4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Ganeti small utilities
42 from cStringIO import StringIO
44 from ganeti import logger
45 from ganeti import errors
46 from ganeti import constants
50 _re_shell_unquoted = re.compile('^[-.,=:/_+@A-Za-z0-9]+$')
54 class RunResult(object):
55 """Simple class for holding the result of running external programs.
58 exit_code: the exit code of the program, or None (if the program
60 signal: numeric signal that caused the program to finish, or None
61 (if the program wasn't terminated by a signal)
62 stdout: the standard output of the program
63 stderr: the standard error of the program
64 failed: a Boolean value which is True in case the program was
65 terminated by a signal or exited with a non-zero exit code
66 fail_reason: a string detailing the termination reason
69 __slots__ = ["exit_code", "signal", "stdout", "stderr",
70 "failed", "fail_reason", "cmd"]
73 def __init__(self, exit_code, signal, stdout, stderr, cmd):
75 self.exit_code = exit_code
79 self.failed = (signal is not None or exit_code != 0)
81 if self.signal is not None:
82 self.fail_reason = "terminated by signal %s" % self.signal
83 elif self.exit_code is not None:
84 self.fail_reason = "exited with exit code %s" % self.exit_code
86 self.fail_reason = "unable to determine termination reason"
88 if debug and self.failed:
89 logger.Debug("Command '%s' failed (%s); output: %s" %
90 (self.cmd, self.fail_reason, self.output))
93 """Returns the combined stdout and stderr for easier usage.
96 return self.stdout + self.stderr
98 output = property(_GetOutput, None, None, "Return full output")
101 def _GetLockFile(subsystem):
102 """Compute the file name for a given lock name."""
103 return "%s/ganeti_lock_%s" % (constants.LOCK_DIR, subsystem)
106 def Lock(name, max_retries=None, debug=False, autoclean=True):
107 """Lock a given subsystem.
109 In case the lock is already held by an alive process, the function
110 will sleep indefintely and poll with a one second interval.
112 When the optional integer argument 'max_retries' is passed with a
113 non-zero value, the function will sleep only for this number of
114 times, and then it will will raise a LockError if the lock can't be
115 acquired. Passing in a negative number will cause only one try to
116 get the lock. Passing a positive number will make the function retry
117 for approximately that number of seconds.
120 lockfile = _GetLockFile(name)
122 if name in _locksheld:
123 raise errors.LockError('Lock "%s" already held!' % (name,))
131 fd = os.open(lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR | os.O_SYNC)
133 except OSError, creat_err:
134 if creat_err.errno != errno.EEXIST:
135 raise errors.LockError("Can't create the lock file. Error '%s'." %
139 pf = open(lockfile, 'r')
140 except IOError, open_err:
143 raise errors.LockError("Lock file exists but cannot be opened."
144 " Error: '%s'." % str(open_err))
151 raise errors.LockError("Invalid pid string in %s" %
154 if not IsProcessAlive(pid):
157 if cleanupcount >= 5:
158 raise errors.LockError, ("Too many stale lock cleanups! Check"
159 " what process is dying.")
160 logger.Error('Stale lockfile %s for pid %d?' % (lockfile, pid))
164 raise errors.LockError("Stale lockfile %s for pid %d?" %
167 if max_retries and max_retries <= retries:
168 raise errors.LockError("Can't acquire lock during the specified"
170 if retries == 5 and (debug or sys.stdin.isatty()):
171 logger.ToStderr("Waiting for '%s' lock from pid %d..." % (name, pid))
177 os.write(fd, '%d\n' % (os.getpid(),))
180 _locksheld.append(name)
184 """Unlock a given subsystem.
187 lockfile = _GetLockFile(name)
190 fd = os.open(lockfile, os.O_RDONLY)
192 raise errors.LockError('Lock "%s" not held.' % (name,))
194 f = os.fdopen(fd, 'r')
200 raise errors.LockError('Unable to determine PID of locking process.')
202 if pid != os.getpid():
203 raise errors.LockError('Lock not held by me (%d != %d)' %
207 _locksheld.remove(name)
214 for lock in _locksheld:
219 """Execute a (shell) command.
221 The command should not read from its standard input, as it will be
225 cmd: command to run. (str)
227 Returns: `RunResult` instance
230 if isinstance(cmd, list):
231 cmd = [str(val) for val in cmd]
232 strcmd = " ".join(cmd)
237 env = os.environ.copy()
239 poller = select.poll()
240 child = subprocess.Popen(cmd, shell=shell,
241 stderr=subprocess.PIPE,
242 stdout=subprocess.PIPE,
243 stdin=subprocess.PIPE,
244 close_fds=True, env=env)
247 poller.register(child.stdout, select.POLLIN)
248 poller.register(child.stderr, select.POLLIN)
252 child.stdout.fileno(): (out, child.stdout),
253 child.stderr.fileno(): (err, child.stderr),
256 status = fcntl.fcntl(fd, fcntl.F_GETFL)
257 fcntl.fcntl(fd, fcntl.F_SETFL, status | os.O_NONBLOCK)
260 for fd, event in poller.poll():
261 if event & select.POLLIN or event & select.POLLPRI:
262 data = fdmap[fd][1].read()
263 # no data from read signifies EOF (the same as POLLHUP)
265 poller.unregister(fd)
268 fdmap[fd][0].write(data)
269 if (event & select.POLLNVAL or event & select.POLLHUP or
270 event & select.POLLERR):
271 poller.unregister(fd)
277 status = child.wait()
285 return RunResult(exitcode, signal, out, err, strcmd)
288 def RunCmdUnlocked(cmd):
289 """Execute a shell command without the 'cmd' lock.
291 This variant of `RunCmd()` drops the 'cmd' lock before running the
292 command and re-aquires it afterwards, thus it can be used to call
293 other ganeti commands.
295 The argument and return values are the same as for the `RunCmd()`
299 cmd - command to run. (str)
312 def RemoveFile(filename):
313 """Remove a file ignoring some errors.
315 Remove a file, ignoring non-existing ones or directories. Other
322 if err.errno not in (errno.ENOENT, errno.EISDIR):
326 def _FingerprintFile(filename):
327 """Compute the fingerprint of a file.
329 If the file does not exist, a None will be returned
333 filename - Filename (str)
336 if not (os.path.exists(filename) and os.path.isfile(filename)):
349 return fp.hexdigest()
352 def FingerprintFiles(files):
353 """Compute fingerprints for a list of files.
356 files - array of filenames. ( [str, ...] )
359 dictionary of filename: fingerprint for the files that exist
364 for filename in files:
365 cksum = _FingerprintFile(filename)
367 ret[filename] = cksum
372 def CheckDict(target, template, logname=None):
373 """Ensure a dictionary has a required set of keys.
375 For the given dictionaries `target` and `template`, ensure target
376 has all the keys from template. Missing keys are added with values
380 target - the dictionary to check
381 template - template dictionary
382 logname - a caller-chosen string to identify the debug log
383 entry; if None, no logging will be done
393 target[k] = template[k]
395 if missing and logname:
396 logger.Debug('%s missing keys %s' %
397 (logname, ', '.join(missing)))
400 def IsProcessAlive(pid):
401 """Check if a given pid exists on the system.
403 Returns: true or false, depending on if the pid exists or not
405 Remarks: zombie processes treated as not alive
409 f = open("/proc/%d/status" % pid)
411 if err.errno in (errno.ENOENT, errno.ENOTDIR):
418 state = data[1].split()
419 if len(state) > 1 and state[1] == "Z":
427 def MatchNameComponent(key, name_list):
428 """Try to match a name against a list.
430 This function will try to match a name like test1 against a list
431 like ['test1.example.com', 'test2.example.com', ...]. Against this
432 list, 'test1' as well as 'test1.example' will match, but not
433 'test1.ex'. A multiple match will be considered as no match at all
434 (e.g. 'test1' against ['test1.example.com', 'test1.example.org']).
437 key: the name to be searched
438 name_list: the list of strings against which to search the key
441 None if there is no match *or* if there are multiple matches
442 otherwise the element from the list which matches
445 mo = re.compile("^%s(\..*)?$" % re.escape(key))
446 names_filtered = [name for name in name_list if mo.match(name) is not None]
447 if len(names_filtered) != 1:
449 return names_filtered[0]
453 """Class implementing resolver and hostname functionality
456 def __init__(self, name=None):
457 """Initialize the host name object.
459 If the name argument is not passed, it will use this system's
464 name = self.SysName()
467 self.name, self.aliases, self.ipaddrs = self.LookupHostname(name)
468 self.ip = self.ipaddrs[0]
471 """Returns the hostname without domain.
474 return self.name.split('.')[0]
478 """Return the current system's name.
480 This is simply a wrapper over socket.gethostname()
483 return socket.gethostname()
486 def LookupHostname(hostname):
490 hostname: hostname to look up
493 a tuple (name, aliases, ipaddrs) as returned by socket.gethostbyname_ex
494 in case of errors in resolving, we raise a ResolverError
498 result = socket.gethostbyname_ex(hostname)
499 except socket.gaierror, err:
500 # hostname not found in DNS
501 raise errors.ResolverError(hostname, err.args[0], err.args[1])
506 def ListVolumeGroups():
507 """List volume groups and their size
510 Dictionary with keys volume name and values the size of the volume
513 command = "vgs --noheadings --units m --nosuffix -o name,size"
514 result = RunCmd(command)
519 for line in result.stdout.splitlines():
521 name, size = line.split()
522 size = int(float(size))
523 except (IndexError, ValueError), err:
524 logger.Error("Invalid output from vgs (%s): %s" % (err, line))
532 def BridgeExists(bridge):
533 """Check whether the given bridge exists in the system
536 True if it does, false otherwise.
539 return os.path.isdir("/sys/class/net/%s/bridge" % bridge)
542 def NiceSort(name_list):
543 """Sort a list of strings based on digit and non-digit groupings.
545 Given a list of names ['a1', 'a10', 'a11', 'a2'] this function will
546 sort the list in the logical order ['a1', 'a2', 'a10', 'a11'].
548 The sort algorithm breaks each name in groups of either only-digits
549 or no-digits. Only the first eight such groups are considered, and
550 after that we just use what's left of the string.
553 - a copy of the list sorted according to our algorithm
556 _SORTER_BASE = "(\D+|\d+)"
557 _SORTER_FULL = "^%s%s?%s?%s?%s?%s?%s?%s?.*$" % (_SORTER_BASE, _SORTER_BASE,
558 _SORTER_BASE, _SORTER_BASE,
559 _SORTER_BASE, _SORTER_BASE,
560 _SORTER_BASE, _SORTER_BASE)
561 _SORTER_RE = re.compile(_SORTER_FULL)
562 _SORTER_NODIGIT = re.compile("^\D*$")
564 """Attempts to convert a variable to integer."""
565 if val is None or _SORTER_NODIGIT.match(val):
570 to_sort = [([_TryInt(grp) for grp in _SORTER_RE.match(name).groups()], name)
571 for name in name_list]
573 return [tup[1] for tup in to_sort]
576 def CheckDaemonAlive(pid_file, process_string):
577 """Check wether the specified daemon is alive.
580 - pid_file: file to read the daemon pid from, the file is
581 expected to contain only a single line containing
583 - process_string: a substring that we expect to find in
584 the command line of the daemon process
587 - True if the daemon is judged to be alive (that is:
588 - the PID file exists, is readable and contains a number
589 - a process of the specified PID is running
590 - that process contains the specified string in its
592 - the process is not in state Z (zombie))
597 pid_file = file(pid_file, 'r')
599 pid = int(pid_file.readline())
603 cmdline_file_path = "/proc/%s/cmdline" % (pid)
604 cmdline_file = open(cmdline_file_path, 'r')
606 cmdline = cmdline_file.readline()
610 if not process_string in cmdline:
613 stat_file_path = "/proc/%s/stat" % (pid)
614 stat_file = open(stat_file_path, 'r')
616 process_state = stat_file.readline().split()[2]
620 if process_state == 'Z':
623 except (IndexError, IOError, ValueError):
629 def TryConvert(fn, val):
630 """Try to convert a value ignoring errors.
632 This function tries to apply function `fn` to `val`. If no
633 ValueError or TypeError exceptions are raised, it will return the
634 result, else it will return the original value. Any other exceptions
635 are propagated to the caller.
640 except (ValueError, TypeError), err:
646 """Verifies the syntax of an IP address.
648 This function checks if the ip address passes is valid or not based
649 on syntax (not ip range, class calculations or anything).
652 unit = "(0|[1-9]\d{0,2})"
653 return re.match("^%s\.%s\.%s\.%s$" % (unit, unit, unit, unit), ip)
656 def IsValidShellParam(word):
657 """Verifies is the given word is safe from the shell's p.o.v.
659 This means that we can pass this to a command via the shell and be
660 sure that it doesn't alter the command line and is passed as such to
663 Note that we are overly restrictive here, in order to be on the safe
667 return bool(re.match("^[-a-zA-Z0-9._+/:%@]+$", word))
670 def BuildShellCmd(template, *args):
671 """Build a safe shell command line from the given arguments.
673 This function will check all arguments in the args list so that they
674 are valid shell parameters (i.e. they don't contain shell
675 metacharaters). If everything is ok, it will return the result of
680 if not IsValidShellParam(word):
681 raise errors.ProgrammerError("Shell argument '%s' contains"
682 " invalid characters" % word)
683 return template % args
686 def FormatUnit(value):
687 """Formats an incoming number of MiB with the appropriate unit.
689 Value needs to be passed as a numeric type. Return value is always a string.
693 return "%dM" % round(value, 0)
695 elif value < (1024 * 1024):
696 return "%0.1fG" % round(float(value) / 1024, 1)
699 return "%0.1fT" % round(float(value) / 1024 / 1024, 1)
702 def ParseUnit(input_string):
703 """Tries to extract number and scale from the given string.
705 Input must be in the format NUMBER+ [DOT NUMBER+] SPACE* [UNIT]. If no unit
706 is specified, it defaults to MiB. Return value is always an int in MiB.
709 m = re.match('^([.\d]+)\s*([a-zA-Z]+)?$', input_string)
711 raise errors.UnitParseError("Invalid format")
713 value = float(m.groups()[0])
717 lcunit = unit.lower()
721 if lcunit in ('m', 'mb', 'mib'):
722 # Value already in MiB
725 elif lcunit in ('g', 'gb', 'gib'):
728 elif lcunit in ('t', 'tb', 'tib'):
732 raise errors.UnitParseError("Unknown unit: %s" % unit)
734 # Make sure we round up
735 if int(value) < value:
738 # Round up to the next multiple of 4
741 value += 4 - value % 4
746 def AddAuthorizedKey(file_name, key):
747 """Adds an SSH public key to an authorized_keys file.
750 file_name: Path to authorized_keys file
751 key: String containing key
753 key_fields = key.split()
755 f = open(file_name, 'a+')
759 # Ignore whitespace changes
760 if line.split() == key_fields:
762 nl = line.endswith('\n')
766 f.write(key.rstrip('\r\n'))
773 def RemoveAuthorizedKey(file_name, key):
774 """Removes an SSH public key from an authorized_keys file.
777 file_name: Path to authorized_keys file
778 key: String containing key
780 key_fields = key.split()
782 fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
784 out = os.fdopen(fd, 'w')
786 f = open(file_name, 'r')
789 # Ignore whitespace changes while comparing lines
790 if line.split() != key_fields:
794 os.rename(tmpname, file_name)
804 def SetEtcHostsEntry(file_name, ip, hostname, aliases):
805 """Sets the name of an IP address and hostname in /etc/hosts.
808 # Ensure aliases are unique
809 aliases = UniqueSequence([hostname] + aliases)[1:]
811 fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
813 out = os.fdopen(fd, 'w')
815 f = open(file_name, 'r')
819 fields = line.split()
820 if fields and not fields[0].startswith('#') and ip == fields[0]:
824 out.write("%s\t%s" % (ip, hostname))
826 out.write(" %s" % ' '.join(aliases))
831 os.rename(tmpname, file_name)
841 def RemoveEtcHostsEntry(file_name, hostname):
842 """Removes a hostname from /etc/hosts.
844 IP addresses without names are removed from the file.
846 fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
848 out = os.fdopen(fd, 'w')
850 f = open(file_name, 'r')
853 fields = line.split()
854 if len(fields) > 1 and not fields[0].startswith('#'):
856 if hostname in names:
857 while hostname in names:
858 names.remove(hostname)
860 out.write("%s %s\n" % (fields[0], ' '.join(names)))
867 os.rename(tmpname, file_name)
877 def CreateBackup(file_name):
878 """Creates a backup of a file.
880 Returns: the path to the newly created backup file.
883 if not os.path.isfile(file_name):
884 raise errors.ProgrammerError("Can't make a backup of a non-file '%s'" %
887 prefix = '%s.backup-%d.' % (os.path.basename(file_name), int(time.time()))
888 dir_name = os.path.dirname(file_name)
890 fsrc = open(file_name, 'rb')
892 (fd, backup_name) = tempfile.mkstemp(prefix=prefix, dir=dir_name)
893 fdst = os.fdopen(fd, 'wb')
895 shutil.copyfileobj(fsrc, fdst)
904 def ShellQuote(value):
905 """Quotes shell argument according to POSIX.
908 if _re_shell_unquoted.match(value):
911 return "'%s'" % value.replace("'", "'\\''")
914 def ShellQuoteArgs(args):
915 """Quotes all given shell arguments and concatenates using spaces.
918 return ' '.join([ShellQuote(i) for i in args])
921 def TcpPing(target, port, timeout=10, live_port_needed=False, source=None):
922 """Simple ping implementation using TCP connect(2).
924 Try to do a TCP connect(2) from an optional source IP to the
925 specified target IP and the specified target port. If the optional
926 parameter live_port_needed is set to true, requires the remote end
927 to accept the connection. The timeout is specified in seconds and
928 defaults to 10 seconds. If the source optional argument is not
929 passed, the source address selection is left to the kernel,
930 otherwise we try to connect using the passed address (failures to
931 bind other than EADDRNOTAVAIL will be ignored).
934 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
938 if source is not None:
940 sock.bind((source, 0))
941 except socket.error, (errcode, errstring):
942 if errcode == errno.EADDRNOTAVAIL:
945 sock.settimeout(timeout)
948 sock.connect((target, port))
951 except socket.timeout:
953 except socket.error, (errcode, errstring):
954 success = (not live_port_needed) and (errcode == errno.ECONNREFUSED)
959 def ListVisibleFiles(path):
960 """Returns a list of all visible files in a directory.
963 files = [i for i in os.listdir(path) if not i.startswith(".")]
968 def GetHomeDir(user, default=None):
969 """Try to get the homedir of the given user.
971 The user can be passed either as a string (denoting the name) or as
972 an integer (denoting the user id). If the user is not found, the
973 'default' argument is returned, which defaults to None.
977 if isinstance(user, basestring):
978 result = pwd.getpwnam(user)
979 elif isinstance(user, (int, long)):
980 result = pwd.getpwuid(user)
982 raise errors.ProgrammerError("Invalid type passed to GetHomeDir (%s)" %
990 """Returns a random UUID.
993 f = open("/proc/sys/kernel/random/uuid", "r")
995 return f.read(128).rstrip("\n")
1000 def WriteFile(file_name, fn=None, data=None,
1001 mode=None, uid=-1, gid=-1,
1002 atime=None, mtime=None):
1003 """(Over)write a file atomically.
1005 The file_name and either fn (a function taking one argument, the
1006 file descriptor, and which should write the data to it) or data (the
1007 contents of the file) must be passed. The other arguments are
1008 optional and allow setting the file mode, owner and group, and the
1009 mtime/atime of the file.
1011 If the function doesn't raise an exception, it has succeeded and the
1012 target file has the new contents. If the file has raised an
1013 exception, an existing target file should be unmodified and the
1014 temporary file should be removed.
1017 if not os.path.isabs(file_name):
1018 raise errors.ProgrammerError("Path passed to WriteFile is not"
1019 " absolute: '%s'" % file_name)
1021 if [fn, data].count(None) != 1:
1022 raise errors.ProgrammerError("fn or data required")
1024 if [atime, mtime].count(None) == 1:
1025 raise errors.ProgrammerError("Both atime and mtime must be either"
1029 dir_name, base_name = os.path.split(file_name)
1030 fd, new_name = tempfile.mkstemp('.new', base_name, dir_name)
1031 # here we need to make sure we remove the temp file, if any error
1032 # leaves it in place
1034 if uid != -1 or gid != -1:
1035 os.chown(new_name, uid, gid)
1037 os.chmod(new_name, mode)
1038 if data is not None:
1043 if atime is not None and mtime is not None:
1044 os.utime(new_name, (atime, mtime))
1045 os.rename(new_name, file_name)
1048 RemoveFile(new_name)
1051 def all(seq, pred=bool):
1052 "Returns True if pred(x) is True for every element in the iterable"
1053 for elem in itertools.ifilterfalse(pred, seq):
1058 def any(seq, pred=bool):
1059 "Returns True if pred(x) is True for at least one element in the iterable"
1060 for elem in itertools.ifilter(pred, seq):
1065 def UniqueSequence(seq):
1066 """Returns a list with unique elements.
1068 Element order is preserved.
1071 return [i for i in seq if i not in seen and not seen.add(i)]
1074 def IsValidMac(mac):
1075 """Predicate to check if a MAC address is valid.
1077 Checks wether the supplied MAC address is formally correct, only
1078 accepts colon separated format.
1080 mac_check = re.compile("^([0-9a-f]{2}(:|$)){6}$")
1081 return mac_check.match(mac) is not None
1084 def TestDelay(duration):
1085 """Sleep for a fixed amount of time.
1090 time.sleep(duration)