4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Ganeti small utilities
43 from cStringIO import StringIO
45 from ganeti import logger
46 from ganeti import errors
47 from ganeti import constants
51 _re_shell_unquoted = re.compile('^[-.,=:/_+@A-Za-z0-9]+$')
57 class RunResult(object):
58 """Simple class for holding the result of running external programs.
61 exit_code: the exit code of the program, or None (if the program
63 signal: numeric signal that caused the program to finish, or None
64 (if the program wasn't terminated by a signal)
65 stdout: the standard output of the program
66 stderr: the standard error of the program
67 failed: a Boolean value which is True in case the program was
68 terminated by a signal or exited with a non-zero exit code
69 fail_reason: a string detailing the termination reason
72 __slots__ = ["exit_code", "signal", "stdout", "stderr",
73 "failed", "fail_reason", "cmd"]
76 def __init__(self, exit_code, signal, stdout, stderr, cmd):
78 self.exit_code = exit_code
82 self.failed = (signal is not None or exit_code != 0)
84 if self.signal is not None:
85 self.fail_reason = "terminated by signal %s" % self.signal
86 elif self.exit_code is not None:
87 self.fail_reason = "exited with exit code %s" % self.exit_code
89 self.fail_reason = "unable to determine termination reason"
91 if debug and self.failed:
92 logger.Debug("Command '%s' failed (%s); output: %s" %
93 (self.cmd, self.fail_reason, self.output))
96 """Returns the combined stdout and stderr for easier usage.
99 return self.stdout + self.stderr
101 output = property(_GetOutput, None, None, "Return full output")
104 def _GetLockFile(subsystem):
105 """Compute the file name for a given lock name."""
106 return "%s/ganeti_lock_%s" % (constants.LOCK_DIR, subsystem)
109 def Lock(name, max_retries=None, debug=False):
110 """Lock a given subsystem.
112 In case the lock is already held by an alive process, the function
113 will sleep indefintely and poll with a one second interval.
115 When the optional integer argument 'max_retries' is passed with a
116 non-zero value, the function will sleep only for this number of
117 times, and then it will will raise a LockError if the lock can't be
118 acquired. Passing in a negative number will cause only one try to
119 get the lock. Passing a positive number will make the function retry
120 for approximately that number of seconds.
123 lockfile = _GetLockFile(name)
125 if name in _locksheld:
126 raise errors.LockError('Lock "%s" already held!' % (name,))
133 fd = os.open(lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR | os.O_SYNC)
135 except OSError, creat_err:
136 if creat_err.errno != errno.EEXIST:
137 raise errors.LockError("Can't create the lock file. Error '%s'." %
141 pf = open(lockfile, 'r')
142 except IOError, open_err:
145 raise errors.LockError("Lock file exists but cannot be opened."
146 " Error: '%s'." % str(open_err))
153 raise errors.LockError("Invalid pid string in %s" %
156 if not IsProcessAlive(pid):
157 raise errors.LockError("Stale lockfile %s for pid %d?" %
160 if max_retries and max_retries <= retries:
161 raise errors.LockError("Can't acquire lock during the specified"
163 if retries == 5 and (debug or sys.stdin.isatty()):
164 logger.ToStderr("Waiting for '%s' lock from pid %d..." % (name, pid))
170 os.write(fd, '%d\n' % (os.getpid(),))
173 _locksheld.append(name)
177 """Unlock a given subsystem.
180 lockfile = _GetLockFile(name)
183 fd = os.open(lockfile, os.O_RDONLY)
185 raise errors.LockError('Lock "%s" not held.' % (name,))
187 f = os.fdopen(fd, 'r')
193 raise errors.LockError('Unable to determine PID of locking process.')
195 if pid != os.getpid():
196 raise errors.LockError('Lock not held by me (%d != %d)' %
200 _locksheld.remove(name)
207 for lock in _locksheld:
212 """Execute a (shell) command.
214 The command should not read from its standard input, as it will be
218 cmd: command to run. (str)
220 Returns: `RunResult` instance
224 raise errors.ProgrammerError("utils.RunCmd() called with fork() disabled")
226 if isinstance(cmd, list):
227 cmd = [str(val) for val in cmd]
228 strcmd = " ".join(cmd)
233 env = os.environ.copy()
235 poller = select.poll()
236 child = subprocess.Popen(cmd, shell=shell,
237 stderr=subprocess.PIPE,
238 stdout=subprocess.PIPE,
239 stdin=subprocess.PIPE,
240 close_fds=True, env=env)
243 poller.register(child.stdout, select.POLLIN)
244 poller.register(child.stderr, select.POLLIN)
248 child.stdout.fileno(): (out, child.stdout),
249 child.stderr.fileno(): (err, child.stderr),
252 status = fcntl.fcntl(fd, fcntl.F_GETFL)
253 fcntl.fcntl(fd, fcntl.F_SETFL, status | os.O_NONBLOCK)
256 for fd, event in poller.poll():
257 if event & select.POLLIN or event & select.POLLPRI:
258 data = fdmap[fd][1].read()
259 # no data from read signifies EOF (the same as POLLHUP)
261 poller.unregister(fd)
264 fdmap[fd][0].write(data)
265 if (event & select.POLLNVAL or event & select.POLLHUP or
266 event & select.POLLERR):
267 poller.unregister(fd)
273 status = child.wait()
281 return RunResult(exitcode, signal, out, err, strcmd)
284 def RunCmdUnlocked(cmd):
285 """Execute a shell command without the 'cmd' lock.
287 This variant of `RunCmd()` drops the 'cmd' lock before running the
288 command and re-aquires it afterwards, thus it can be used to call
289 other ganeti commands.
291 The argument and return values are the same as for the `RunCmd()`
295 cmd - command to run. (str)
308 def RemoveFile(filename):
309 """Remove a file ignoring some errors.
311 Remove a file, ignoring non-existing ones or directories. Other
318 if err.errno not in (errno.ENOENT, errno.EISDIR):
322 def _FingerprintFile(filename):
323 """Compute the fingerprint of a file.
325 If the file does not exist, a None will be returned
329 filename - Filename (str)
332 if not (os.path.exists(filename) and os.path.isfile(filename)):
345 return fp.hexdigest()
348 def FingerprintFiles(files):
349 """Compute fingerprints for a list of files.
352 files - array of filenames. ( [str, ...] )
355 dictionary of filename: fingerprint for the files that exist
360 for filename in files:
361 cksum = _FingerprintFile(filename)
363 ret[filename] = cksum
368 def CheckDict(target, template, logname=None):
369 """Ensure a dictionary has a required set of keys.
371 For the given dictionaries `target` and `template`, ensure target
372 has all the keys from template. Missing keys are added with values
376 target - the dictionary to check
377 template - template dictionary
378 logname - a caller-chosen string to identify the debug log
379 entry; if None, no logging will be done
389 target[k] = template[k]
391 if missing and logname:
392 logger.Debug('%s missing keys %s' %
393 (logname, ', '.join(missing)))
396 def IsProcessAlive(pid):
397 """Check if a given pid exists on the system.
399 Returns: true or false, depending on if the pid exists or not
401 Remarks: zombie processes treated as not alive
405 f = open("/proc/%d/status" % pid)
407 if err.errno in (errno.ENOENT, errno.ENOTDIR):
414 state = data[1].split()
415 if len(state) > 1 and state[1] == "Z":
423 def MatchNameComponent(key, name_list):
424 """Try to match a name against a list.
426 This function will try to match a name like test1 against a list
427 like ['test1.example.com', 'test2.example.com', ...]. Against this
428 list, 'test1' as well as 'test1.example' will match, but not
429 'test1.ex'. A multiple match will be considered as no match at all
430 (e.g. 'test1' against ['test1.example.com', 'test1.example.org']).
433 key: the name to be searched
434 name_list: the list of strings against which to search the key
437 None if there is no match *or* if there are multiple matches
438 otherwise the element from the list which matches
441 mo = re.compile("^%s(\..*)?$" % re.escape(key))
442 names_filtered = [name for name in name_list if mo.match(name) is not None]
443 if len(names_filtered) != 1:
445 return names_filtered[0]
449 """Class implementing resolver and hostname functionality
452 def __init__(self, name=None):
453 """Initialize the host name object.
455 If the name argument is not passed, it will use this system's
460 name = self.SysName()
463 self.name, self.aliases, self.ipaddrs = self.LookupHostname(name)
464 self.ip = self.ipaddrs[0]
467 """Returns the hostname without domain.
470 return self.name.split('.')[0]
474 """Return the current system's name.
476 This is simply a wrapper over socket.gethostname()
479 return socket.gethostname()
482 def LookupHostname(hostname):
486 hostname: hostname to look up
489 a tuple (name, aliases, ipaddrs) as returned by socket.gethostbyname_ex
490 in case of errors in resolving, we raise a ResolverError
494 result = socket.gethostbyname_ex(hostname)
495 except socket.gaierror, err:
496 # hostname not found in DNS
497 raise errors.ResolverError(hostname, err.args[0], err.args[1])
502 def ListVolumeGroups():
503 """List volume groups and their size
506 Dictionary with keys volume name and values the size of the volume
509 command = "vgs --noheadings --units m --nosuffix -o name,size"
510 result = RunCmd(command)
515 for line in result.stdout.splitlines():
517 name, size = line.split()
518 size = int(float(size))
519 except (IndexError, ValueError), err:
520 logger.Error("Invalid output from vgs (%s): %s" % (err, line))
528 def BridgeExists(bridge):
529 """Check whether the given bridge exists in the system
532 True if it does, false otherwise.
535 return os.path.isdir("/sys/class/net/%s/bridge" % bridge)
538 def NiceSort(name_list):
539 """Sort a list of strings based on digit and non-digit groupings.
541 Given a list of names ['a1', 'a10', 'a11', 'a2'] this function will
542 sort the list in the logical order ['a1', 'a2', 'a10', 'a11'].
544 The sort algorithm breaks each name in groups of either only-digits
545 or no-digits. Only the first eight such groups are considered, and
546 after that we just use what's left of the string.
549 - a copy of the list sorted according to our algorithm
552 _SORTER_BASE = "(\D+|\d+)"
553 _SORTER_FULL = "^%s%s?%s?%s?%s?%s?%s?%s?.*$" % (_SORTER_BASE, _SORTER_BASE,
554 _SORTER_BASE, _SORTER_BASE,
555 _SORTER_BASE, _SORTER_BASE,
556 _SORTER_BASE, _SORTER_BASE)
557 _SORTER_RE = re.compile(_SORTER_FULL)
558 _SORTER_NODIGIT = re.compile("^\D*$")
560 """Attempts to convert a variable to integer."""
561 if val is None or _SORTER_NODIGIT.match(val):
566 to_sort = [([_TryInt(grp) for grp in _SORTER_RE.match(name).groups()], name)
567 for name in name_list]
569 return [tup[1] for tup in to_sort]
572 def TryConvert(fn, val):
573 """Try to convert a value ignoring errors.
575 This function tries to apply function `fn` to `val`. If no
576 ValueError or TypeError exceptions are raised, it will return the
577 result, else it will return the original value. Any other exceptions
578 are propagated to the caller.
583 except (ValueError, TypeError), err:
589 """Verifies the syntax of an IP address.
591 This function checks if the ip address passes is valid or not based
592 on syntax (not ip range, class calculations or anything).
595 unit = "(0|[1-9]\d{0,2})"
596 return re.match("^%s\.%s\.%s\.%s$" % (unit, unit, unit, unit), ip)
599 def IsValidShellParam(word):
600 """Verifies is the given word is safe from the shell's p.o.v.
602 This means that we can pass this to a command via the shell and be
603 sure that it doesn't alter the command line and is passed as such to
606 Note that we are overly restrictive here, in order to be on the safe
610 return bool(re.match("^[-a-zA-Z0-9._+/:%@]+$", word))
613 def BuildShellCmd(template, *args):
614 """Build a safe shell command line from the given arguments.
616 This function will check all arguments in the args list so that they
617 are valid shell parameters (i.e. they don't contain shell
618 metacharaters). If everything is ok, it will return the result of
623 if not IsValidShellParam(word):
624 raise errors.ProgrammerError("Shell argument '%s' contains"
625 " invalid characters" % word)
626 return template % args
629 def FormatUnit(value):
630 """Formats an incoming number of MiB with the appropriate unit.
632 Value needs to be passed as a numeric type. Return value is always a string.
636 return "%dM" % round(value, 0)
638 elif value < (1024 * 1024):
639 return "%0.1fG" % round(float(value) / 1024, 1)
642 return "%0.1fT" % round(float(value) / 1024 / 1024, 1)
645 def ParseUnit(input_string):
646 """Tries to extract number and scale from the given string.
648 Input must be in the format NUMBER+ [DOT NUMBER+] SPACE* [UNIT]. If no unit
649 is specified, it defaults to MiB. Return value is always an int in MiB.
652 m = re.match('^([.\d]+)\s*([a-zA-Z]+)?$', input_string)
654 raise errors.UnitParseError("Invalid format")
656 value = float(m.groups()[0])
660 lcunit = unit.lower()
664 if lcunit in ('m', 'mb', 'mib'):
665 # Value already in MiB
668 elif lcunit in ('g', 'gb', 'gib'):
671 elif lcunit in ('t', 'tb', 'tib'):
675 raise errors.UnitParseError("Unknown unit: %s" % unit)
677 # Make sure we round up
678 if int(value) < value:
681 # Round up to the next multiple of 4
684 value += 4 - value % 4
689 def AddAuthorizedKey(file_name, key):
690 """Adds an SSH public key to an authorized_keys file.
693 file_name: Path to authorized_keys file
694 key: String containing key
696 key_fields = key.split()
698 f = open(file_name, 'a+')
702 # Ignore whitespace changes
703 if line.split() == key_fields:
705 nl = line.endswith('\n')
709 f.write(key.rstrip('\r\n'))
716 def RemoveAuthorizedKey(file_name, key):
717 """Removes an SSH public key from an authorized_keys file.
720 file_name: Path to authorized_keys file
721 key: String containing key
723 key_fields = key.split()
725 fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
727 out = os.fdopen(fd, 'w')
729 f = open(file_name, 'r')
732 # Ignore whitespace changes while comparing lines
733 if line.split() != key_fields:
737 os.rename(tmpname, file_name)
747 def SetEtcHostsEntry(file_name, ip, hostname, aliases):
748 """Sets the name of an IP address and hostname in /etc/hosts.
751 # Ensure aliases are unique
752 aliases = UniqueSequence([hostname] + aliases)[1:]
754 fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
756 out = os.fdopen(fd, 'w')
758 f = open(file_name, 'r')
762 fields = line.split()
763 if fields and not fields[0].startswith('#') and ip == fields[0]:
767 out.write("%s\t%s" % (ip, hostname))
769 out.write(" %s" % ' '.join(aliases))
774 os.rename(tmpname, file_name)
784 def AddHostToEtcHosts(hostname):
785 """Wrapper around SetEtcHostsEntry.
788 hi = HostInfo(name=hostname)
789 SetEtcHostsEntry(constants.ETC_HOSTS, hi.ip, hi.name, [hi.ShortName()])
792 def RemoveEtcHostsEntry(file_name, hostname):
793 """Removes a hostname from /etc/hosts.
795 IP addresses without names are removed from the file.
797 fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
799 out = os.fdopen(fd, 'w')
801 f = open(file_name, 'r')
804 fields = line.split()
805 if len(fields) > 1 and not fields[0].startswith('#'):
807 if hostname in names:
808 while hostname in names:
809 names.remove(hostname)
811 out.write("%s %s\n" % (fields[0], ' '.join(names)))
818 os.rename(tmpname, file_name)
828 def RemoveHostFromEtcHosts(hostname):
829 """Wrapper around RemoveEtcHostsEntry.
832 hi = HostInfo(name=hostname)
833 RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.name)
834 RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName())
837 def CreateBackup(file_name):
838 """Creates a backup of a file.
840 Returns: the path to the newly created backup file.
843 if not os.path.isfile(file_name):
844 raise errors.ProgrammerError("Can't make a backup of a non-file '%s'" %
847 prefix = '%s.backup-%d.' % (os.path.basename(file_name), int(time.time()))
848 dir_name = os.path.dirname(file_name)
850 fsrc = open(file_name, 'rb')
852 (fd, backup_name) = tempfile.mkstemp(prefix=prefix, dir=dir_name)
853 fdst = os.fdopen(fd, 'wb')
855 shutil.copyfileobj(fsrc, fdst)
864 def ShellQuote(value):
865 """Quotes shell argument according to POSIX.
868 if _re_shell_unquoted.match(value):
871 return "'%s'" % value.replace("'", "'\\''")
874 def ShellQuoteArgs(args):
875 """Quotes all given shell arguments and concatenates using spaces.
878 return ' '.join([ShellQuote(i) for i in args])
881 def TcpPing(target, port, timeout=10, live_port_needed=False, source=None):
882 """Simple ping implementation using TCP connect(2).
884 Try to do a TCP connect(2) from an optional source IP to the
885 specified target IP and the specified target port. If the optional
886 parameter live_port_needed is set to true, requires the remote end
887 to accept the connection. The timeout is specified in seconds and
888 defaults to 10 seconds. If the source optional argument is not
889 passed, the source address selection is left to the kernel,
890 otherwise we try to connect using the passed address (failures to
891 bind other than EADDRNOTAVAIL will be ignored).
894 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
898 if source is not None:
900 sock.bind((source, 0))
901 except socket.error, (errcode, errstring):
902 if errcode == errno.EADDRNOTAVAIL:
905 sock.settimeout(timeout)
908 sock.connect((target, port))
911 except socket.timeout:
913 except socket.error, (errcode, errstring):
914 success = (not live_port_needed) and (errcode == errno.ECONNREFUSED)
919 def ListVisibleFiles(path):
920 """Returns a list of all visible files in a directory.
923 files = [i for i in os.listdir(path) if not i.startswith(".")]
928 def GetHomeDir(user, default=None):
929 """Try to get the homedir of the given user.
931 The user can be passed either as a string (denoting the name) or as
932 an integer (denoting the user id). If the user is not found, the
933 'default' argument is returned, which defaults to None.
937 if isinstance(user, basestring):
938 result = pwd.getpwnam(user)
939 elif isinstance(user, (int, long)):
940 result = pwd.getpwuid(user)
942 raise errors.ProgrammerError("Invalid type passed to GetHomeDir (%s)" %
950 """Returns a random UUID.
953 f = open("/proc/sys/kernel/random/uuid", "r")
955 return f.read(128).rstrip("\n")
960 def WriteFile(file_name, fn=None, data=None,
961 mode=None, uid=-1, gid=-1,
962 atime=None, mtime=None, close=True,
963 dry_run=False, backup=False,
964 prewrite=None, postwrite=None):
965 """(Over)write a file atomically.
967 The file_name and either fn (a function taking one argument, the
968 file descriptor, and which should write the data to it) or data (the
969 contents of the file) must be passed. The other arguments are
970 optional and allow setting the file mode, owner and group, and the
971 mtime/atime of the file.
973 If the function doesn't raise an exception, it has succeeded and the
974 target file has the new contents. If the file has raised an
975 exception, an existing target file should be unmodified and the
976 temporary file should be removed.
979 file_name: New filename
980 fn: Content writing function, called with file descriptor as parameter
981 data: Content as string
986 mtime: Modification time
987 close: Whether to close file after writing it
988 prewrite: Function object called before writing content
989 postwrite: Function object called after writing content
992 None if "close" parameter evaluates to True, otherwise file descriptor.
995 if not os.path.isabs(file_name):
996 raise errors.ProgrammerError("Path passed to WriteFile is not"
997 " absolute: '%s'" % file_name)
999 if [fn, data].count(None) != 1:
1000 raise errors.ProgrammerError("fn or data required")
1002 if [atime, mtime].count(None) == 1:
1003 raise errors.ProgrammerError("Both atime and mtime must be either"
1006 if backup and not dry_run and os.path.isfile(file_name):
1007 CreateBackup(file_name)
1009 dir_name, base_name = os.path.split(file_name)
1010 fd, new_name = tempfile.mkstemp('.new', base_name, dir_name)
1011 # here we need to make sure we remove the temp file, if any error
1012 # leaves it in place
1014 if uid != -1 or gid != -1:
1015 os.chown(new_name, uid, gid)
1017 os.chmod(new_name, mode)
1018 if callable(prewrite):
1020 if data is not None:
1024 if callable(postwrite):
1027 if atime is not None and mtime is not None:
1028 os.utime(new_name, (atime, mtime))
1030 os.rename(new_name, file_name)
1037 RemoveFile(new_name)
1042 def all(seq, pred=bool):
1043 "Returns True if pred(x) is True for every element in the iterable"
1044 for elem in itertools.ifilterfalse(pred, seq):
1049 def any(seq, pred=bool):
1050 "Returns True if pred(x) is True for at least one element in the iterable"
1051 for elem in itertools.ifilter(pred, seq):
1056 def UniqueSequence(seq):
1057 """Returns a list with unique elements.
1059 Element order is preserved.
1062 return [i for i in seq if i not in seen and not seen.add(i)]
1065 def IsValidMac(mac):
1066 """Predicate to check if a MAC address is valid.
1068 Checks wether the supplied MAC address is formally correct, only
1069 accepts colon separated format.
1071 mac_check = re.compile("^([0-9a-f]{2}(:|$)){6}$")
1072 return mac_check.match(mac) is not None
1075 def TestDelay(duration):
1076 """Sleep for a fixed amount of time.
1081 time.sleep(duration)
1085 def Daemonize(logfile, noclose_fds=None):
1086 """Daemonize the current process.
1088 This detaches the current process from the controlling terminal and
1089 runs it in the background as a daemon.
1094 # Default maximum for the number of available file descriptors.
1095 if 'SC_OPEN_MAX' in os.sysconf_names:
1097 MAXFD = os.sysconf('SC_OPEN_MAX')
1107 if (pid == 0): # The first child.
1110 pid = os.fork() # Fork a second child.
1111 if (pid == 0): # The second child.
1115 # exit() or _exit()? See below.
1116 os._exit(0) # Exit parent (the first child) of the second child.
1118 os._exit(0) # Exit parent of the first child.
1119 maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
1120 if (maxfd == resource.RLIM_INFINITY):
1123 # Iterate through and close all file descriptors.
1124 for fd in range(0, maxfd):
1125 if noclose_fds and fd in noclose_fds:
1129 except OSError: # ERROR, fd wasn't open to begin with (ignored)
1131 os.open(logfile, os.O_RDWR|os.O_CREAT|os.O_APPEND, 0600)
1132 # Duplicate standard input to standard output and standard error.
1133 os.dup2(0, 1) # standard output (1)
1134 os.dup2(0, 2) # standard error (2)
1138 def FindFile(name, search_path, test=os.path.exists):
1139 """Look for a filesystem object in a given path.
1141 This is an abstract method to search for filesystem object (files,
1142 dirs) under a given search path.
1145 - name: the name to look for
1146 - search_path: list of directory names
1147 - test: the test which the full path must satisfy
1148 (defaults to os.path.exists)
1151 - full path to the item if found
1155 for dir_name in search_path:
1156 item_name = os.path.sep.join([dir_name, name])
1162 def CheckVolumeGroupSize(vglist, vgname, minsize):
1163 """Checks if the volume group list is valid.
1165 A non-None return value means there's an error, and the return value
1166 is the error message.
1169 vgsize = vglist.get(vgname, None)
1171 return "volume group '%s' missing" % vgname
1172 elif vgsize < minsize:
1173 return ("volume group '%s' too small (%s MiB required, %d MiB found)" %
1174 (vgname, minsize, vgsize))