4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Ganeti small utilities
43 from cStringIO import StringIO
45 from ganeti import logger
46 from ganeti import errors
47 from ganeti import constants
51 _re_shell_unquoted = re.compile('^[-.,=:/_+@A-Za-z0-9]+$')
55 class RunResult(object):
56 """Simple class for holding the result of running external programs.
59 exit_code: the exit code of the program, or None (if the program
61 signal: numeric signal that caused the program to finish, or None
62 (if the program wasn't terminated by a signal)
63 stdout: the standard output of the program
64 stderr: the standard error of the program
65 failed: a Boolean value which is True in case the program was
66 terminated by a signal or exited with a non-zero exit code
67 fail_reason: a string detailing the termination reason
70 __slots__ = ["exit_code", "signal", "stdout", "stderr",
71 "failed", "fail_reason", "cmd"]
74 def __init__(self, exit_code, signal, stdout, stderr, cmd):
76 self.exit_code = exit_code
80 self.failed = (signal is not None or exit_code != 0)
82 if self.signal is not None:
83 self.fail_reason = "terminated by signal %s" % self.signal
84 elif self.exit_code is not None:
85 self.fail_reason = "exited with exit code %s" % self.exit_code
87 self.fail_reason = "unable to determine termination reason"
89 if debug and self.failed:
90 logger.Debug("Command '%s' failed (%s); output: %s" %
91 (self.cmd, self.fail_reason, self.output))
94 """Returns the combined stdout and stderr for easier usage.
97 return self.stdout + self.stderr
99 output = property(_GetOutput, None, None, "Return full output")
102 def _GetLockFile(subsystem):
103 """Compute the file name for a given lock name."""
104 return "%s/ganeti_lock_%s" % (constants.LOCK_DIR, subsystem)
107 def Lock(name, max_retries=None, debug=False, autoclean=True):
108 """Lock a given subsystem.
110 In case the lock is already held by an alive process, the function
111 will sleep indefintely and poll with a one second interval.
113 When the optional integer argument 'max_retries' is passed with a
114 non-zero value, the function will sleep only for this number of
115 times, and then it will will raise a LockError if the lock can't be
116 acquired. Passing in a negative number will cause only one try to
117 get the lock. Passing a positive number will make the function retry
118 for approximately that number of seconds.
121 lockfile = _GetLockFile(name)
123 if name in _locksheld:
124 raise errors.LockError('Lock "%s" already held!' % (name,))
132 fd = os.open(lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR | os.O_SYNC)
134 except OSError, creat_err:
135 if creat_err.errno != errno.EEXIST:
136 raise errors.LockError("Can't create the lock file. Error '%s'." %
140 pf = open(lockfile, 'r')
141 except IOError, open_err:
144 raise errors.LockError("Lock file exists but cannot be opened."
145 " Error: '%s'." % str(open_err))
152 raise errors.LockError("Invalid pid string in %s" %
155 if not IsProcessAlive(pid):
158 if cleanupcount >= 5:
159 raise errors.LockError, ("Too many stale lock cleanups! Check"
160 " what process is dying.")
161 logger.Error('Stale lockfile %s for pid %d, autocleaned.' %
166 raise errors.LockError("Stale lockfile %s for pid %d?" %
169 if max_retries and max_retries <= retries:
170 raise errors.LockError("Can't acquire lock during the specified"
172 if retries == 5 and (debug or sys.stdin.isatty()):
173 logger.ToStderr("Waiting for '%s' lock from pid %d..." % (name, pid))
179 os.write(fd, '%d\n' % (os.getpid(),))
182 _locksheld.append(name)
186 """Unlock a given subsystem.
189 lockfile = _GetLockFile(name)
192 fd = os.open(lockfile, os.O_RDONLY)
194 raise errors.LockError('Lock "%s" not held.' % (name,))
196 f = os.fdopen(fd, 'r')
202 raise errors.LockError('Unable to determine PID of locking process.')
204 if pid != os.getpid():
205 raise errors.LockError('Lock not held by me (%d != %d)' %
209 _locksheld.remove(name)
216 for lock in _locksheld:
221 """Execute a (shell) command.
223 The command should not read from its standard input, as it will be
227 cmd: command to run. (str)
229 Returns: `RunResult` instance
232 if isinstance(cmd, list):
233 cmd = [str(val) for val in cmd]
234 strcmd = " ".join(cmd)
239 env = os.environ.copy()
241 poller = select.poll()
242 child = subprocess.Popen(cmd, shell=shell,
243 stderr=subprocess.PIPE,
244 stdout=subprocess.PIPE,
245 stdin=subprocess.PIPE,
246 close_fds=True, env=env)
249 poller.register(child.stdout, select.POLLIN)
250 poller.register(child.stderr, select.POLLIN)
254 child.stdout.fileno(): (out, child.stdout),
255 child.stderr.fileno(): (err, child.stderr),
258 status = fcntl.fcntl(fd, fcntl.F_GETFL)
259 fcntl.fcntl(fd, fcntl.F_SETFL, status | os.O_NONBLOCK)
262 for fd, event in poller.poll():
263 if event & select.POLLIN or event & select.POLLPRI:
264 data = fdmap[fd][1].read()
265 # no data from read signifies EOF (the same as POLLHUP)
267 poller.unregister(fd)
270 fdmap[fd][0].write(data)
271 if (event & select.POLLNVAL or event & select.POLLHUP or
272 event & select.POLLERR):
273 poller.unregister(fd)
279 status = child.wait()
287 return RunResult(exitcode, signal, out, err, strcmd)
290 def RunCmdUnlocked(cmd):
291 """Execute a shell command without the 'cmd' lock.
293 This variant of `RunCmd()` drops the 'cmd' lock before running the
294 command and re-aquires it afterwards, thus it can be used to call
295 other ganeti commands.
297 The argument and return values are the same as for the `RunCmd()`
301 cmd - command to run. (str)
314 def RemoveFile(filename):
315 """Remove a file ignoring some errors.
317 Remove a file, ignoring non-existing ones or directories. Other
324 if err.errno not in (errno.ENOENT, errno.EISDIR):
328 def _FingerprintFile(filename):
329 """Compute the fingerprint of a file.
331 If the file does not exist, a None will be returned
335 filename - Filename (str)
338 if not (os.path.exists(filename) and os.path.isfile(filename)):
351 return fp.hexdigest()
354 def FingerprintFiles(files):
355 """Compute fingerprints for a list of files.
358 files - array of filenames. ( [str, ...] )
361 dictionary of filename: fingerprint for the files that exist
366 for filename in files:
367 cksum = _FingerprintFile(filename)
369 ret[filename] = cksum
374 def CheckDict(target, template, logname=None):
375 """Ensure a dictionary has a required set of keys.
377 For the given dictionaries `target` and `template`, ensure target
378 has all the keys from template. Missing keys are added with values
382 target - the dictionary to check
383 template - template dictionary
384 logname - a caller-chosen string to identify the debug log
385 entry; if None, no logging will be done
395 target[k] = template[k]
397 if missing and logname:
398 logger.Debug('%s missing keys %s' %
399 (logname, ', '.join(missing)))
402 def IsProcessAlive(pid):
403 """Check if a given pid exists on the system.
405 Returns: true or false, depending on if the pid exists or not
407 Remarks: zombie processes treated as not alive
411 f = open("/proc/%d/status" % pid)
413 if err.errno in (errno.ENOENT, errno.ENOTDIR):
420 state = data[1].split()
421 if len(state) > 1 and state[1] == "Z":
429 def MatchNameComponent(key, name_list):
430 """Try to match a name against a list.
432 This function will try to match a name like test1 against a list
433 like ['test1.example.com', 'test2.example.com', ...]. Against this
434 list, 'test1' as well as 'test1.example' will match, but not
435 'test1.ex'. A multiple match will be considered as no match at all
436 (e.g. 'test1' against ['test1.example.com', 'test1.example.org']).
439 key: the name to be searched
440 name_list: the list of strings against which to search the key
443 None if there is no match *or* if there are multiple matches
444 otherwise the element from the list which matches
447 mo = re.compile("^%s(\..*)?$" % re.escape(key))
448 names_filtered = [name for name in name_list if mo.match(name) is not None]
449 if len(names_filtered) != 1:
451 return names_filtered[0]
455 """Class implementing resolver and hostname functionality
458 def __init__(self, name=None):
459 """Initialize the host name object.
461 If the name argument is not passed, it will use this system's
466 name = self.SysName()
469 self.name, self.aliases, self.ipaddrs = self.LookupHostname(name)
470 self.ip = self.ipaddrs[0]
473 """Returns the hostname without domain.
476 return self.name.split('.')[0]
480 """Return the current system's name.
482 This is simply a wrapper over socket.gethostname()
485 return socket.gethostname()
488 def LookupHostname(hostname):
492 hostname: hostname to look up
495 a tuple (name, aliases, ipaddrs) as returned by socket.gethostbyname_ex
496 in case of errors in resolving, we raise a ResolverError
500 result = socket.gethostbyname_ex(hostname)
501 except socket.gaierror, err:
502 # hostname not found in DNS
503 raise errors.ResolverError(hostname, err.args[0], err.args[1])
508 def ListVolumeGroups():
509 """List volume groups and their size
512 Dictionary with keys volume name and values the size of the volume
515 command = "vgs --noheadings --units m --nosuffix -o name,size"
516 result = RunCmd(command)
521 for line in result.stdout.splitlines():
523 name, size = line.split()
524 size = int(float(size))
525 except (IndexError, ValueError), err:
526 logger.Error("Invalid output from vgs (%s): %s" % (err, line))
534 def BridgeExists(bridge):
535 """Check whether the given bridge exists in the system
538 True if it does, false otherwise.
541 return os.path.isdir("/sys/class/net/%s/bridge" % bridge)
544 def NiceSort(name_list):
545 """Sort a list of strings based on digit and non-digit groupings.
547 Given a list of names ['a1', 'a10', 'a11', 'a2'] this function will
548 sort the list in the logical order ['a1', 'a2', 'a10', 'a11'].
550 The sort algorithm breaks each name in groups of either only-digits
551 or no-digits. Only the first eight such groups are considered, and
552 after that we just use what's left of the string.
555 - a copy of the list sorted according to our algorithm
558 _SORTER_BASE = "(\D+|\d+)"
559 _SORTER_FULL = "^%s%s?%s?%s?%s?%s?%s?%s?.*$" % (_SORTER_BASE, _SORTER_BASE,
560 _SORTER_BASE, _SORTER_BASE,
561 _SORTER_BASE, _SORTER_BASE,
562 _SORTER_BASE, _SORTER_BASE)
563 _SORTER_RE = re.compile(_SORTER_FULL)
564 _SORTER_NODIGIT = re.compile("^\D*$")
566 """Attempts to convert a variable to integer."""
567 if val is None or _SORTER_NODIGIT.match(val):
572 to_sort = [([_TryInt(grp) for grp in _SORTER_RE.match(name).groups()], name)
573 for name in name_list]
575 return [tup[1] for tup in to_sort]
578 def CheckDaemonAlive(pid_file, process_string):
579 """Check wether the specified daemon is alive.
582 - pid_file: file to read the daemon pid from, the file is
583 expected to contain only a single line containing
585 - process_string: a substring that we expect to find in
586 the command line of the daemon process
589 - True if the daemon is judged to be alive (that is:
590 - the PID file exists, is readable and contains a number
591 - a process of the specified PID is running
592 - that process contains the specified string in its
594 - the process is not in state Z (zombie))
599 pid_file = file(pid_file, 'r')
601 pid = int(pid_file.readline())
605 cmdline_file_path = "/proc/%s/cmdline" % (pid)
606 cmdline_file = open(cmdline_file_path, 'r')
608 cmdline = cmdline_file.readline()
612 if not process_string in cmdline:
615 stat_file_path = "/proc/%s/stat" % (pid)
616 stat_file = open(stat_file_path, 'r')
618 process_state = stat_file.readline().split()[2]
622 if process_state == 'Z':
625 except (IndexError, IOError, ValueError):
631 def TryConvert(fn, val):
632 """Try to convert a value ignoring errors.
634 This function tries to apply function `fn` to `val`. If no
635 ValueError or TypeError exceptions are raised, it will return the
636 result, else it will return the original value. Any other exceptions
637 are propagated to the caller.
642 except (ValueError, TypeError), err:
648 """Verifies the syntax of an IP address.
650 This function checks if the ip address passes is valid or not based
651 on syntax (not ip range, class calculations or anything).
654 unit = "(0|[1-9]\d{0,2})"
655 return re.match("^%s\.%s\.%s\.%s$" % (unit, unit, unit, unit), ip)
658 def IsValidShellParam(word):
659 """Verifies is the given word is safe from the shell's p.o.v.
661 This means that we can pass this to a command via the shell and be
662 sure that it doesn't alter the command line and is passed as such to
665 Note that we are overly restrictive here, in order to be on the safe
669 return bool(re.match("^[-a-zA-Z0-9._+/:%@]+$", word))
672 def BuildShellCmd(template, *args):
673 """Build a safe shell command line from the given arguments.
675 This function will check all arguments in the args list so that they
676 are valid shell parameters (i.e. they don't contain shell
677 metacharaters). If everything is ok, it will return the result of
682 if not IsValidShellParam(word):
683 raise errors.ProgrammerError("Shell argument '%s' contains"
684 " invalid characters" % word)
685 return template % args
688 def FormatUnit(value):
689 """Formats an incoming number of MiB with the appropriate unit.
691 Value needs to be passed as a numeric type. Return value is always a string.
695 return "%dM" % round(value, 0)
697 elif value < (1024 * 1024):
698 return "%0.1fG" % round(float(value) / 1024, 1)
701 return "%0.1fT" % round(float(value) / 1024 / 1024, 1)
704 def ParseUnit(input_string):
705 """Tries to extract number and scale from the given string.
707 Input must be in the format NUMBER+ [DOT NUMBER+] SPACE* [UNIT]. If no unit
708 is specified, it defaults to MiB. Return value is always an int in MiB.
711 m = re.match('^([.\d]+)\s*([a-zA-Z]+)?$', input_string)
713 raise errors.UnitParseError("Invalid format")
715 value = float(m.groups()[0])
719 lcunit = unit.lower()
723 if lcunit in ('m', 'mb', 'mib'):
724 # Value already in MiB
727 elif lcunit in ('g', 'gb', 'gib'):
730 elif lcunit in ('t', 'tb', 'tib'):
734 raise errors.UnitParseError("Unknown unit: %s" % unit)
736 # Make sure we round up
737 if int(value) < value:
740 # Round up to the next multiple of 4
743 value += 4 - value % 4
748 def AddAuthorizedKey(file_name, key):
749 """Adds an SSH public key to an authorized_keys file.
752 file_name: Path to authorized_keys file
753 key: String containing key
755 key_fields = key.split()
757 f = open(file_name, 'a+')
761 # Ignore whitespace changes
762 if line.split() == key_fields:
764 nl = line.endswith('\n')
768 f.write(key.rstrip('\r\n'))
775 def RemoveAuthorizedKey(file_name, key):
776 """Removes an SSH public key from an authorized_keys file.
779 file_name: Path to authorized_keys file
780 key: String containing key
782 key_fields = key.split()
784 fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
786 out = os.fdopen(fd, 'w')
788 f = open(file_name, 'r')
791 # Ignore whitespace changes while comparing lines
792 if line.split() != key_fields:
796 os.rename(tmpname, file_name)
806 def SetEtcHostsEntry(file_name, ip, hostname, aliases):
807 """Sets the name of an IP address and hostname in /etc/hosts.
810 # Ensure aliases are unique
811 aliases = UniqueSequence([hostname] + aliases)[1:]
813 fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
815 out = os.fdopen(fd, 'w')
817 f = open(file_name, 'r')
821 fields = line.split()
822 if fields and not fields[0].startswith('#') and ip == fields[0]:
826 out.write("%s\t%s" % (ip, hostname))
828 out.write(" %s" % ' '.join(aliases))
833 os.rename(tmpname, file_name)
843 def RemoveEtcHostsEntry(file_name, hostname):
844 """Removes a hostname from /etc/hosts.
846 IP addresses without names are removed from the file.
848 fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
850 out = os.fdopen(fd, 'w')
852 f = open(file_name, 'r')
855 fields = line.split()
856 if len(fields) > 1 and not fields[0].startswith('#'):
858 if hostname in names:
859 while hostname in names:
860 names.remove(hostname)
862 out.write("%s %s\n" % (fields[0], ' '.join(names)))
869 os.rename(tmpname, file_name)
879 def CreateBackup(file_name):
880 """Creates a backup of a file.
882 Returns: the path to the newly created backup file.
885 if not os.path.isfile(file_name):
886 raise errors.ProgrammerError("Can't make a backup of a non-file '%s'" %
889 prefix = '%s.backup-%d.' % (os.path.basename(file_name), int(time.time()))
890 dir_name = os.path.dirname(file_name)
892 fsrc = open(file_name, 'rb')
894 (fd, backup_name) = tempfile.mkstemp(prefix=prefix, dir=dir_name)
895 fdst = os.fdopen(fd, 'wb')
897 shutil.copyfileobj(fsrc, fdst)
906 def ShellQuote(value):
907 """Quotes shell argument according to POSIX.
910 if _re_shell_unquoted.match(value):
913 return "'%s'" % value.replace("'", "'\\''")
916 def ShellQuoteArgs(args):
917 """Quotes all given shell arguments and concatenates using spaces.
920 return ' '.join([ShellQuote(i) for i in args])
923 def TcpPing(target, port, timeout=10, live_port_needed=False, source=None):
924 """Simple ping implementation using TCP connect(2).
926 Try to do a TCP connect(2) from an optional source IP to the
927 specified target IP and the specified target port. If the optional
928 parameter live_port_needed is set to true, requires the remote end
929 to accept the connection. The timeout is specified in seconds and
930 defaults to 10 seconds. If the source optional argument is not
931 passed, the source address selection is left to the kernel,
932 otherwise we try to connect using the passed address (failures to
933 bind other than EADDRNOTAVAIL will be ignored).
936 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
940 if source is not None:
942 sock.bind((source, 0))
943 except socket.error, (errcode, errstring):
944 if errcode == errno.EADDRNOTAVAIL:
947 sock.settimeout(timeout)
950 sock.connect((target, port))
953 except socket.timeout:
955 except socket.error, (errcode, errstring):
956 success = (not live_port_needed) and (errcode == errno.ECONNREFUSED)
961 def ListVisibleFiles(path):
962 """Returns a list of all visible files in a directory.
965 files = [i for i in os.listdir(path) if not i.startswith(".")]
970 def GetHomeDir(user, default=None):
971 """Try to get the homedir of the given user.
973 The user can be passed either as a string (denoting the name) or as
974 an integer (denoting the user id). If the user is not found, the
975 'default' argument is returned, which defaults to None.
979 if isinstance(user, basestring):
980 result = pwd.getpwnam(user)
981 elif isinstance(user, (int, long)):
982 result = pwd.getpwuid(user)
984 raise errors.ProgrammerError("Invalid type passed to GetHomeDir (%s)" %
992 """Returns a random UUID.
995 f = open("/proc/sys/kernel/random/uuid", "r")
997 return f.read(128).rstrip("\n")
1002 def WriteFile(file_name, fn=None, data=None,
1003 mode=None, uid=-1, gid=-1,
1004 atime=None, mtime=None):
1005 """(Over)write a file atomically.
1007 The file_name and either fn (a function taking one argument, the
1008 file descriptor, and which should write the data to it) or data (the
1009 contents of the file) must be passed. The other arguments are
1010 optional and allow setting the file mode, owner and group, and the
1011 mtime/atime of the file.
1013 If the function doesn't raise an exception, it has succeeded and the
1014 target file has the new contents. If the file has raised an
1015 exception, an existing target file should be unmodified and the
1016 temporary file should be removed.
1019 if not os.path.isabs(file_name):
1020 raise errors.ProgrammerError("Path passed to WriteFile is not"
1021 " absolute: '%s'" % file_name)
1023 if [fn, data].count(None) != 1:
1024 raise errors.ProgrammerError("fn or data required")
1026 if [atime, mtime].count(None) == 1:
1027 raise errors.ProgrammerError("Both atime and mtime must be either"
1031 dir_name, base_name = os.path.split(file_name)
1032 fd, new_name = tempfile.mkstemp('.new', base_name, dir_name)
1033 # here we need to make sure we remove the temp file, if any error
1034 # leaves it in place
1036 if uid != -1 or gid != -1:
1037 os.chown(new_name, uid, gid)
1039 os.chmod(new_name, mode)
1040 if data is not None:
1045 if atime is not None and mtime is not None:
1046 os.utime(new_name, (atime, mtime))
1047 os.rename(new_name, file_name)
1050 RemoveFile(new_name)
1053 def all(seq, pred=bool):
1054 "Returns True if pred(x) is True for every element in the iterable"
1055 for elem in itertools.ifilterfalse(pred, seq):
1060 def any(seq, pred=bool):
1061 "Returns True if pred(x) is True for at least one element in the iterable"
1062 for elem in itertools.ifilter(pred, seq):
1067 def UniqueSequence(seq):
1068 """Returns a list with unique elements.
1070 Element order is preserved.
1073 return [i for i in seq if i not in seen and not seen.add(i)]
1076 def IsValidMac(mac):
1077 """Predicate to check if a MAC address is valid.
1079 Checks wether the supplied MAC address is formally correct, only
1080 accepts colon separated format.
1082 mac_check = re.compile("^([0-9a-f]{2}(:|$)){6}$")
1083 return mac_check.match(mac) is not None
1086 def TestDelay(duration):
1087 """Sleep for a fixed amount of time.
1092 time.sleep(duration)
1096 def Daemonize(logfile):
1097 """Daemonize the current process.
1099 This detaches the current process from the controlling terminal and
1100 runs it in the background as a daemon.
1105 # Default maximum for the number of available file descriptors.
1106 if 'SC_OPEN_MAX' in os.sysconf_names:
1108 MAXFD = os.sysconf('SC_OPEN_MAX')
1118 if (pid == 0): # The first child.
1121 pid = os.fork() # Fork a second child.
1122 if (pid == 0): # The second child.
1126 # exit() or _exit()? See below.
1127 os._exit(0) # Exit parent (the first child) of the second child.
1129 os._exit(0) # Exit parent of the first child.
1130 maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
1131 if (maxfd == resource.RLIM_INFINITY):
1134 # Iterate through and close all file descriptors.
1135 for fd in range(0, maxfd):
1138 except OSError: # ERROR, fd wasn't open to begin with (ignored)
1140 os.open(logfile, os.O_RDWR|os.O_CREAT|os.O_APPEND, 0600)
1141 # Duplicate standard input to standard output and standard error.
1142 os.dup2(0, 1) # standard output (1)
1143 os.dup2(0, 2) # standard error (2)
1147 def FindFile(name, search_path, test=os.path.exists):
1148 """Look for a filesystem object in a given path.
1150 This is an abstract method to search for filesystem object (files,
1151 dirs) under a given search path.
1154 - name: the name to look for
1155 - search_path: list of directory names
1156 - test: the test which the full path must satisfy
1157 (defaults to os.path.exists)
1160 - full path to the item if found
1164 for dir_name in search_path:
1165 item_name = os.path.sep.join([dir_name, name])