4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Ganeti small utilities
45 from cStringIO import StringIO
47 from ganeti import errors
48 from ganeti import constants
52 _re_shell_unquoted = re.compile('^[-.,=:/_+@A-Za-z0-9]+$')
59 class RunResult(object):
60 """Simple class for holding the result of running external programs.
63 exit_code: the exit code of the program, or None (if the program
65 signal: numeric signal that caused the program to finish, or None
66 (if the program wasn't terminated by a signal)
67 stdout: the standard output of the program
68 stderr: the standard error of the program
69 failed: a Boolean value which is True in case the program was
70 terminated by a signal or exited with a non-zero exit code
71 fail_reason: a string detailing the termination reason
74 __slots__ = ["exit_code", "signal", "stdout", "stderr",
75 "failed", "fail_reason", "cmd"]
78 def __init__(self, exit_code, signal_, stdout, stderr, cmd):
80 self.exit_code = exit_code
84 self.failed = (signal_ is not None or exit_code != 0)
86 if self.signal is not None:
87 self.fail_reason = "terminated by signal %s" % self.signal
88 elif self.exit_code is not None:
89 self.fail_reason = "exited with exit code %s" % self.exit_code
91 self.fail_reason = "unable to determine termination reason"
94 logging.debug("Command '%s' failed (%s); output: %s",
95 self.cmd, self.fail_reason, self.output)
98 """Returns the combined stdout and stderr for easier usage.
101 return self.stdout + self.stderr
103 output = property(_GetOutput, None, None, "Return full output")
106 def RunCmd(cmd, env=None, output=None):
107 """Execute a (shell) command.
109 The command should not read from its standard input, as it will be
112 @type cmd: string or list
113 @param cmd: Command to run
115 @keyword env: Additional environment
117 @keyword output: if desired, the output of the command can be
118 saved in a file instead of the RunResult instance; this
119 parameter denotes the file name (if not None)
121 @return: `RunResult` instance
125 raise errors.ProgrammerError("utils.RunCmd() called with fork() disabled")
127 if isinstance(cmd, list):
128 cmd = [str(val) for val in cmd]
129 strcmd = " ".join(cmd)
134 logging.debug("RunCmd '%s'", strcmd)
136 cmd_env = os.environ.copy()
137 cmd_env["LC_ALL"] = "C"
142 out, err, status = _RunCmdPipe(cmd, cmd_env, shell)
144 status = _RunCmdFile(cmd, cmd_env, shell, output)
154 return RunResult(exitcode, signal_, out, err, strcmd)
156 def _RunCmdPipe(cmd, env, via_shell):
157 """Run a command and return its output.
159 @type cmd: string or list
160 @param cmd: Command to run
162 @param env: The environment to use
163 @type via_shell: bool
164 @param via_shell: if we should run via the shell
166 @return: (out, err, status)
169 poller = select.poll()
170 child = subprocess.Popen(cmd, shell=via_shell,
171 stderr=subprocess.PIPE,
172 stdout=subprocess.PIPE,
173 stdin=subprocess.PIPE,
174 close_fds=True, env=env)
177 poller.register(child.stdout, select.POLLIN)
178 poller.register(child.stderr, select.POLLIN)
182 child.stdout.fileno(): (out, child.stdout),
183 child.stderr.fileno(): (err, child.stderr),
186 status = fcntl.fcntl(fd, fcntl.F_GETFL)
187 fcntl.fcntl(fd, fcntl.F_SETFL, status | os.O_NONBLOCK)
190 for fd, event in poller.poll():
191 if event & select.POLLIN or event & select.POLLPRI:
192 data = fdmap[fd][1].read()
193 # no data from read signifies EOF (the same as POLLHUP)
195 poller.unregister(fd)
198 fdmap[fd][0].write(data)
199 if (event & select.POLLNVAL or event & select.POLLHUP or
200 event & select.POLLERR):
201 poller.unregister(fd)
207 status = child.wait()
208 return out, err, status
211 def _RunCmdFile(cmd, env, via_shell, output):
212 """Run a command and save its output to a file.
214 @type cmd: string or list
215 @param cmd: Command to run
217 @param env: The environment to use
218 @type via_shell: bool
219 @param via_shell: if we should run via the shell
221 @param output: the filename in which to save the output
223 @return: the exit status
226 fh = open(output, "a")
228 child = subprocess.Popen(cmd, shell=via_shell,
229 stderr=subprocess.STDOUT,
231 stdin=subprocess.PIPE,
232 close_fds=True, env=env)
235 status = child.wait()
241 def RemoveFile(filename):
242 """Remove a file ignoring some errors.
244 Remove a file, ignoring non-existing ones or directories. Other
251 if err.errno not in (errno.ENOENT, errno.EISDIR):
255 def _FingerprintFile(filename):
256 """Compute the fingerprint of a file.
258 If the file does not exist, a None will be returned
262 filename - Filename (str)
265 if not (os.path.exists(filename) and os.path.isfile(filename)):
278 return fp.hexdigest()
281 def FingerprintFiles(files):
282 """Compute fingerprints for a list of files.
285 files - array of filenames. ( [str, ...] )
288 dictionary of filename: fingerprint for the files that exist
293 for filename in files:
294 cksum = _FingerprintFile(filename)
296 ret[filename] = cksum
301 def CheckDict(target, template, logname=None):
302 """Ensure a dictionary has a required set of keys.
304 For the given dictionaries `target` and `template`, ensure target
305 has all the keys from template. Missing keys are added with values
309 target - the dictionary to check
310 template - template dictionary
311 logname - a caller-chosen string to identify the debug log
312 entry; if None, no logging will be done
322 target[k] = template[k]
324 if missing and logname:
325 logging.warning('%s missing keys %s', logname, ', '.join(missing))
328 def IsProcessAlive(pid):
329 """Check if a given pid exists on the system.
331 Returns: true or false, depending on if the pid exists or not
333 Remarks: zombie processes treated as not alive, and giving a pid <=
334 0 makes the function to return False.
341 f = open("/proc/%d/status" % pid)
343 if err.errno in (errno.ENOENT, errno.ENOTDIR):
350 state = data[1].split()
351 if len(state) > 1 and state[1] == "Z":
359 def ReadPidFile(pidfile):
360 """Read the pid from a file.
362 @param pidfile: Path to a file containing the pid to be checked
363 @type pidfile: string (filename)
364 @return: The process id, if the file exista and contains a valid PID,
370 pf = open(pidfile, 'r')
371 except EnvironmentError, err:
372 if err.errno != errno.ENOENT:
373 logging.exception("Can't read pid file?!")
378 except ValueError, err:
379 logging.info("Can't parse pid file contents", exc_info=True)
385 def MatchNameComponent(key, name_list):
386 """Try to match a name against a list.
388 This function will try to match a name like test1 against a list
389 like ['test1.example.com', 'test2.example.com', ...]. Against this
390 list, 'test1' as well as 'test1.example' will match, but not
391 'test1.ex'. A multiple match will be considered as no match at all
392 (e.g. 'test1' against ['test1.example.com', 'test1.example.org']).
395 key: the name to be searched
396 name_list: the list of strings against which to search the key
399 None if there is no match *or* if there are multiple matches
400 otherwise the element from the list which matches
403 mo = re.compile("^%s(\..*)?$" % re.escape(key))
404 names_filtered = [name for name in name_list if mo.match(name) is not None]
405 if len(names_filtered) != 1:
407 return names_filtered[0]
411 """Class implementing resolver and hostname functionality
414 def __init__(self, name=None):
415 """Initialize the host name object.
417 If the name argument is not passed, it will use this system's
422 name = self.SysName()
425 self.name, self.aliases, self.ipaddrs = self.LookupHostname(name)
426 self.ip = self.ipaddrs[0]
429 """Returns the hostname without domain.
432 return self.name.split('.')[0]
436 """Return the current system's name.
438 This is simply a wrapper over socket.gethostname()
441 return socket.gethostname()
444 def LookupHostname(hostname):
448 hostname: hostname to look up
451 a tuple (name, aliases, ipaddrs) as returned by socket.gethostbyname_ex
452 in case of errors in resolving, we raise a ResolverError
456 result = socket.gethostbyname_ex(hostname)
457 except socket.gaierror, err:
458 # hostname not found in DNS
459 raise errors.ResolverError(hostname, err.args[0], err.args[1])
464 def ListVolumeGroups():
465 """List volume groups and their size
468 Dictionary with keys volume name and values the size of the volume
471 command = "vgs --noheadings --units m --nosuffix -o name,size"
472 result = RunCmd(command)
477 for line in result.stdout.splitlines():
479 name, size = line.split()
480 size = int(float(size))
481 except (IndexError, ValueError), err:
482 logging.error("Invalid output from vgs (%s): %s", err, line)
490 def BridgeExists(bridge):
491 """Check whether the given bridge exists in the system
494 True if it does, false otherwise.
497 return os.path.isdir("/sys/class/net/%s/bridge" % bridge)
500 def NiceSort(name_list):
501 """Sort a list of strings based on digit and non-digit groupings.
503 Given a list of names ['a1', 'a10', 'a11', 'a2'] this function will
504 sort the list in the logical order ['a1', 'a2', 'a10', 'a11'].
506 The sort algorithm breaks each name in groups of either only-digits
507 or no-digits. Only the first eight such groups are considered, and
508 after that we just use what's left of the string.
511 - a copy of the list sorted according to our algorithm
514 _SORTER_BASE = "(\D+|\d+)"
515 _SORTER_FULL = "^%s%s?%s?%s?%s?%s?%s?%s?.*$" % (_SORTER_BASE, _SORTER_BASE,
516 _SORTER_BASE, _SORTER_BASE,
517 _SORTER_BASE, _SORTER_BASE,
518 _SORTER_BASE, _SORTER_BASE)
519 _SORTER_RE = re.compile(_SORTER_FULL)
520 _SORTER_NODIGIT = re.compile("^\D*$")
522 """Attempts to convert a variable to integer."""
523 if val is None or _SORTER_NODIGIT.match(val):
528 to_sort = [([_TryInt(grp) for grp in _SORTER_RE.match(name).groups()], name)
529 for name in name_list]
531 return [tup[1] for tup in to_sort]
534 def TryConvert(fn, val):
535 """Try to convert a value ignoring errors.
537 This function tries to apply function `fn` to `val`. If no
538 ValueError or TypeError exceptions are raised, it will return the
539 result, else it will return the original value. Any other exceptions
540 are propagated to the caller.
545 except (ValueError, TypeError), err:
551 """Verifies the syntax of an IP address.
553 This function checks if the ip address passes is valid or not based
554 on syntax (not ip range, class calculations or anything).
557 unit = "(0|[1-9]\d{0,2})"
558 return re.match("^%s\.%s\.%s\.%s$" % (unit, unit, unit, unit), ip)
561 def IsValidShellParam(word):
562 """Verifies is the given word is safe from the shell's p.o.v.
564 This means that we can pass this to a command via the shell and be
565 sure that it doesn't alter the command line and is passed as such to
568 Note that we are overly restrictive here, in order to be on the safe
572 return bool(re.match("^[-a-zA-Z0-9._+/:%@]+$", word))
575 def BuildShellCmd(template, *args):
576 """Build a safe shell command line from the given arguments.
578 This function will check all arguments in the args list so that they
579 are valid shell parameters (i.e. they don't contain shell
580 metacharaters). If everything is ok, it will return the result of
585 if not IsValidShellParam(word):
586 raise errors.ProgrammerError("Shell argument '%s' contains"
587 " invalid characters" % word)
588 return template % args
591 def FormatUnit(value):
592 """Formats an incoming number of MiB with the appropriate unit.
594 Value needs to be passed as a numeric type. Return value is always a string.
598 return "%dM" % round(value, 0)
600 elif value < (1024 * 1024):
601 return "%0.1fG" % round(float(value) / 1024, 1)
604 return "%0.1fT" % round(float(value) / 1024 / 1024, 1)
607 def ParseUnit(input_string):
608 """Tries to extract number and scale from the given string.
610 Input must be in the format NUMBER+ [DOT NUMBER+] SPACE* [UNIT]. If no unit
611 is specified, it defaults to MiB. Return value is always an int in MiB.
614 m = re.match('^([.\d]+)\s*([a-zA-Z]+)?$', input_string)
616 raise errors.UnitParseError("Invalid format")
618 value = float(m.groups()[0])
622 lcunit = unit.lower()
626 if lcunit in ('m', 'mb', 'mib'):
627 # Value already in MiB
630 elif lcunit in ('g', 'gb', 'gib'):
633 elif lcunit in ('t', 'tb', 'tib'):
637 raise errors.UnitParseError("Unknown unit: %s" % unit)
639 # Make sure we round up
640 if int(value) < value:
643 # Round up to the next multiple of 4
646 value += 4 - value % 4
651 def AddAuthorizedKey(file_name, key):
652 """Adds an SSH public key to an authorized_keys file.
655 file_name: Path to authorized_keys file
656 key: String containing key
658 key_fields = key.split()
660 f = open(file_name, 'a+')
664 # Ignore whitespace changes
665 if line.split() == key_fields:
667 nl = line.endswith('\n')
671 f.write(key.rstrip('\r\n'))
678 def RemoveAuthorizedKey(file_name, key):
679 """Removes an SSH public key from an authorized_keys file.
682 file_name: Path to authorized_keys file
683 key: String containing key
685 key_fields = key.split()
687 fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
689 out = os.fdopen(fd, 'w')
691 f = open(file_name, 'r')
694 # Ignore whitespace changes while comparing lines
695 if line.split() != key_fields:
699 os.rename(tmpname, file_name)
709 def SetEtcHostsEntry(file_name, ip, hostname, aliases):
710 """Sets the name of an IP address and hostname in /etc/hosts.
713 # Ensure aliases are unique
714 aliases = UniqueSequence([hostname] + aliases)[1:]
716 fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
718 out = os.fdopen(fd, 'w')
720 f = open(file_name, 'r')
724 fields = line.split()
725 if fields and not fields[0].startswith('#') and ip == fields[0]:
729 out.write("%s\t%s" % (ip, hostname))
731 out.write(" %s" % ' '.join(aliases))
736 os.rename(tmpname, file_name)
746 def AddHostToEtcHosts(hostname):
747 """Wrapper around SetEtcHostsEntry.
750 hi = HostInfo(name=hostname)
751 SetEtcHostsEntry(constants.ETC_HOSTS, hi.ip, hi.name, [hi.ShortName()])
754 def RemoveEtcHostsEntry(file_name, hostname):
755 """Removes a hostname from /etc/hosts.
757 IP addresses without names are removed from the file.
759 fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
761 out = os.fdopen(fd, 'w')
763 f = open(file_name, 'r')
766 fields = line.split()
767 if len(fields) > 1 and not fields[0].startswith('#'):
769 if hostname in names:
770 while hostname in names:
771 names.remove(hostname)
773 out.write("%s %s\n" % (fields[0], ' '.join(names)))
780 os.rename(tmpname, file_name)
790 def RemoveHostFromEtcHosts(hostname):
791 """Wrapper around RemoveEtcHostsEntry.
794 hi = HostInfo(name=hostname)
795 RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.name)
796 RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName())
799 def CreateBackup(file_name):
800 """Creates a backup of a file.
802 Returns: the path to the newly created backup file.
805 if not os.path.isfile(file_name):
806 raise errors.ProgrammerError("Can't make a backup of a non-file '%s'" %
809 prefix = '%s.backup-%d.' % (os.path.basename(file_name), int(time.time()))
810 dir_name = os.path.dirname(file_name)
812 fsrc = open(file_name, 'rb')
814 (fd, backup_name) = tempfile.mkstemp(prefix=prefix, dir=dir_name)
815 fdst = os.fdopen(fd, 'wb')
817 shutil.copyfileobj(fsrc, fdst)
826 def ShellQuote(value):
827 """Quotes shell argument according to POSIX.
830 if _re_shell_unquoted.match(value):
833 return "'%s'" % value.replace("'", "'\\''")
836 def ShellQuoteArgs(args):
837 """Quotes all given shell arguments and concatenates using spaces.
840 return ' '.join([ShellQuote(i) for i in args])
843 def TcpPing(target, port, timeout=10, live_port_needed=False, source=None):
844 """Simple ping implementation using TCP connect(2).
846 Try to do a TCP connect(2) from an optional source IP to the
847 specified target IP and the specified target port. If the optional
848 parameter live_port_needed is set to true, requires the remote end
849 to accept the connection. The timeout is specified in seconds and
850 defaults to 10 seconds. If the source optional argument is not
851 passed, the source address selection is left to the kernel,
852 otherwise we try to connect using the passed address (failures to
853 bind other than EADDRNOTAVAIL will be ignored).
856 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
860 if source is not None:
862 sock.bind((source, 0))
863 except socket.error, (errcode, errstring):
864 if errcode == errno.EADDRNOTAVAIL:
867 sock.settimeout(timeout)
870 sock.connect((target, port))
873 except socket.timeout:
875 except socket.error, (errcode, errstring):
876 success = (not live_port_needed) and (errcode == errno.ECONNREFUSED)
881 def OwnIpAddress(address):
882 """Check if the current host has the the given IP address.
884 Currently this is done by tcp-pinging the address from the loopback
887 @type address: string
888 @param address: the addres to check
892 return TcpPing(address, constants.DEFAULT_NODED_PORT,
893 source=constants.LOCALHOST_IP_ADDRESS)
896 def ListVisibleFiles(path):
897 """Returns a list of all visible files in a directory.
900 files = [i for i in os.listdir(path) if not i.startswith(".")]
905 def GetHomeDir(user, default=None):
906 """Try to get the homedir of the given user.
908 The user can be passed either as a string (denoting the name) or as
909 an integer (denoting the user id). If the user is not found, the
910 'default' argument is returned, which defaults to None.
914 if isinstance(user, basestring):
915 result = pwd.getpwnam(user)
916 elif isinstance(user, (int, long)):
917 result = pwd.getpwuid(user)
919 raise errors.ProgrammerError("Invalid type passed to GetHomeDir (%s)" %
927 """Returns a random UUID.
930 f = open("/proc/sys/kernel/random/uuid", "r")
932 return f.read(128).rstrip("\n")
937 def GenerateSecret():
938 """Generates a random secret.
940 This will generate a pseudo-random secret, and return its sha digest
941 (so that it can be used where an ASCII string is needed).
944 return sha.new(os.urandom(64)).hexdigest()
947 def ReadFile(file_name, size=None):
950 @type size: None or int
951 @param size: Read at most size bytes
954 f = open(file_name, "r")
964 def WriteFile(file_name, fn=None, data=None,
965 mode=None, uid=-1, gid=-1,
966 atime=None, mtime=None, close=True,
967 dry_run=False, backup=False,
968 prewrite=None, postwrite=None):
969 """(Over)write a file atomically.
971 The file_name and either fn (a function taking one argument, the
972 file descriptor, and which should write the data to it) or data (the
973 contents of the file) must be passed. The other arguments are
974 optional and allow setting the file mode, owner and group, and the
975 mtime/atime of the file.
977 If the function doesn't raise an exception, it has succeeded and the
978 target file has the new contents. If the file has raised an
979 exception, an existing target file should be unmodified and the
980 temporary file should be removed.
983 file_name: New filename
984 fn: Content writing function, called with file descriptor as parameter
985 data: Content as string
990 mtime: Modification time
991 close: Whether to close file after writing it
992 prewrite: Function object called before writing content
993 postwrite: Function object called after writing content
996 None if "close" parameter evaluates to True, otherwise file descriptor.
999 if not os.path.isabs(file_name):
1000 raise errors.ProgrammerError("Path passed to WriteFile is not"
1001 " absolute: '%s'" % file_name)
1003 if [fn, data].count(None) != 1:
1004 raise errors.ProgrammerError("fn or data required")
1006 if [atime, mtime].count(None) == 1:
1007 raise errors.ProgrammerError("Both atime and mtime must be either"
1010 if backup and not dry_run and os.path.isfile(file_name):
1011 CreateBackup(file_name)
1013 dir_name, base_name = os.path.split(file_name)
1014 fd, new_name = tempfile.mkstemp('.new', base_name, dir_name)
1015 # here we need to make sure we remove the temp file, if any error
1016 # leaves it in place
1018 if uid != -1 or gid != -1:
1019 os.chown(new_name, uid, gid)
1021 os.chmod(new_name, mode)
1022 if callable(prewrite):
1024 if data is not None:
1028 if callable(postwrite):
1031 if atime is not None and mtime is not None:
1032 os.utime(new_name, (atime, mtime))
1034 os.rename(new_name, file_name)
1041 RemoveFile(new_name)
1046 def FirstFree(seq, base=0):
1047 """Returns the first non-existing integer from seq.
1049 The seq argument should be a sorted list of positive integers. The
1050 first time the index of an element is smaller than the element
1051 value, the index will be returned.
1053 The base argument is used to start at a different offset,
1054 i.e. [3, 4, 6] with offset=3 will return 5.
1056 Example: [0, 1, 3] will return 2.
1059 for idx, elem in enumerate(seq):
1060 assert elem >= base, "Passed element is higher than base offset"
1061 if elem > idx + base:
1067 def all(seq, pred=bool):
1068 "Returns True if pred(x) is True for every element in the iterable"
1069 for elem in itertools.ifilterfalse(pred, seq):
1074 def any(seq, pred=bool):
1075 "Returns True if pred(x) is True for at least one element in the iterable"
1076 for elem in itertools.ifilter(pred, seq):
1081 def UniqueSequence(seq):
1082 """Returns a list with unique elements.
1084 Element order is preserved.
1087 return [i for i in seq if i not in seen and not seen.add(i)]
1090 def IsValidMac(mac):
1091 """Predicate to check if a MAC address is valid.
1093 Checks wether the supplied MAC address is formally correct, only
1094 accepts colon separated format.
1096 mac_check = re.compile("^([0-9a-f]{2}(:|$)){6}$")
1097 return mac_check.match(mac) is not None
1100 def TestDelay(duration):
1101 """Sleep for a fixed amount of time.
1106 time.sleep(duration)
1110 def Daemonize(logfile, noclose_fds=None):
1111 """Daemonize the current process.
1113 This detaches the current process from the controlling terminal and
1114 runs it in the background as a daemon.
1119 # Default maximum for the number of available file descriptors.
1120 if 'SC_OPEN_MAX' in os.sysconf_names:
1122 MAXFD = os.sysconf('SC_OPEN_MAX')
1132 if (pid == 0): # The first child.
1135 pid = os.fork() # Fork a second child.
1136 if (pid == 0): # The second child.
1140 # exit() or _exit()? See below.
1141 os._exit(0) # Exit parent (the first child) of the second child.
1143 os._exit(0) # Exit parent of the first child.
1144 maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
1145 if (maxfd == resource.RLIM_INFINITY):
1148 # Iterate through and close all file descriptors.
1149 for fd in range(0, maxfd):
1150 if noclose_fds and fd in noclose_fds:
1154 except OSError: # ERROR, fd wasn't open to begin with (ignored)
1156 os.open(logfile, os.O_RDWR|os.O_CREAT|os.O_APPEND, 0600)
1157 # Duplicate standard input to standard output and standard error.
1158 os.dup2(0, 1) # standard output (1)
1159 os.dup2(0, 2) # standard error (2)
1163 def DaemonPidFileName(name):
1164 """Compute a ganeti pid file absolute path, given the daemon name.
1167 return os.path.join(constants.RUN_GANETI_DIR, "%s.pid" % name)
1170 def WritePidFile(name):
1171 """Write the current process pidfile.
1173 The file will be written to constants.RUN_GANETI_DIR/name.pid
1177 pidfilename = DaemonPidFileName(name)
1178 if IsProcessAlive(ReadPidFile(pidfilename)):
1179 raise errors.GenericError("%s contains a live process" % pidfilename)
1181 WriteFile(pidfilename, data="%d\n" % pid)
1184 def RemovePidFile(name):
1185 """Remove the current process pidfile.
1187 Any errors are ignored.
1191 pidfilename = DaemonPidFileName(name)
1192 # TODO: we could check here that the file contains our pid
1194 RemoveFile(pidfilename)
1199 def KillProcess(pid, signal_=signal.SIGTERM, timeout=30):
1200 """Kill a process given by its pid.
1203 @param pid: The PID to terminate.
1205 @param signal_: The signal to send, by default SIGTERM
1207 @param timeout: The timeout after which, if the process is still alive,
1208 a SIGKILL will be sent. If not positive, no such checking
1213 # kill with pid=0 == suicide
1214 raise errors.ProgrammerError("Invalid pid given '%s'" % pid)
1216 if not IsProcessAlive(pid):
1218 os.kill(pid, signal_)
1221 end = time.time() + timeout
1222 while time.time() < end and IsProcessAlive(pid):
1224 if IsProcessAlive(pid):
1225 os.kill(pid, signal.SIGKILL)
1228 def FindFile(name, search_path, test=os.path.exists):
1229 """Look for a filesystem object in a given path.
1231 This is an abstract method to search for filesystem object (files,
1232 dirs) under a given search path.
1235 - name: the name to look for
1236 - search_path: list of directory names
1237 - test: the test which the full path must satisfy
1238 (defaults to os.path.exists)
1241 - full path to the item if found
1245 for dir_name in search_path:
1246 item_name = os.path.sep.join([dir_name, name])
1252 def CheckVolumeGroupSize(vglist, vgname, minsize):
1253 """Checks if the volume group list is valid.
1255 A non-None return value means there's an error, and the return value
1256 is the error message.
1259 vgsize = vglist.get(vgname, None)
1261 return "volume group '%s' missing" % vgname
1262 elif vgsize < minsize:
1263 return ("volume group '%s' too small (%s MiB required, %d MiB found)" %
1264 (vgname, minsize, vgsize))
1268 def SplitTime(value):
1269 """Splits time as floating point number into a tuple.
1271 @param value: Time in seconds
1272 @type value: int or float
1273 @return: Tuple containing (seconds, microseconds)
1276 (seconds, microseconds) = divmod(int(value * 1000000), 1000000)
1278 assert 0 <= seconds, \
1279 "Seconds must be larger than or equal to 0, but are %s" % seconds
1280 assert 0 <= microseconds <= 999999, \
1281 "Microseconds must be 0-999999, but are %s" % microseconds
1283 return (int(seconds), int(microseconds))
1286 def MergeTime(timetuple):
1287 """Merges a tuple into time as a floating point number.
1289 @param timetuple: Time as tuple, (seconds, microseconds)
1290 @type timetuple: tuple
1291 @return: Time as a floating point number expressed in seconds
1294 (seconds, microseconds) = timetuple
1296 assert 0 <= seconds, \
1297 "Seconds must be larger than or equal to 0, but are %s" % seconds
1298 assert 0 <= microseconds <= 999999, \
1299 "Microseconds must be 0-999999, but are %s" % microseconds
1301 return float(seconds) + (float(microseconds) * 0.000001)
1304 def GetNodeDaemonPort():
1305 """Get the node daemon port for this cluster.
1307 Note that this routine does not read a ganeti-specific file, but
1308 instead uses socket.getservbyname to allow pre-customization of
1309 this parameter outside of Ganeti.
1313 port = socket.getservbyname("ganeti-noded", "tcp")
1314 except socket.error:
1315 port = constants.DEFAULT_NODED_PORT
1320 def GetNodeDaemonPassword():
1321 """Get the node password for the cluster.
1324 return ReadFile(constants.CLUSTER_PASSWORD_FILE)
1327 def SetupLogging(logfile, debug=False, stderr_logging=False, program=""):
1328 """Configures the logging module.
1331 fmt = "%(asctime)s: " + program + " "
1333 fmt += ("pid=%(process)d/%(threadName)s %(levelname)s"
1334 " %(module)s:%(lineno)s %(message)s")
1336 fmt += "pid=%(process)d %(levelname)s %(message)s"
1337 formatter = logging.Formatter(fmt)
1339 root_logger = logging.getLogger("")
1340 root_logger.setLevel(logging.NOTSET)
1343 stderr_handler = logging.StreamHandler()
1344 stderr_handler.setFormatter(formatter)
1346 stderr_handler.setLevel(logging.NOTSET)
1348 stderr_handler.setLevel(logging.CRITICAL)
1349 root_logger.addHandler(stderr_handler)
1351 # this can fail, if the logging directories are not setup or we have
1352 # a permisssion problem; in this case, it's best to log but ignore
1353 # the error if stderr_logging is True, and if false we re-raise the
1354 # exception since otherwise we could run but without any logs at all
1356 logfile_handler = logging.FileHandler(logfile)
1357 logfile_handler.setFormatter(formatter)
1359 logfile_handler.setLevel(logging.DEBUG)
1361 logfile_handler.setLevel(logging.INFO)
1362 root_logger.addHandler(logfile_handler)
1363 except EnvironmentError, err:
1365 logging.exception("Failed to enable logging to file '%s'", logfile)
1367 # we need to re-raise the exception
1371 def LockedMethod(fn):
1372 """Synchronized object access decorator.
1374 This decorator is intended to protect access to an object using the
1375 object's own lock which is hardcoded to '_lock'.
1378 def _LockDebug(*args, **kwargs):
1380 logging.debug(*args, **kwargs)
1382 def wrapper(self, *args, **kwargs):
1383 assert hasattr(self, '_lock')
1385 _LockDebug("Waiting for %s", lock)
1388 _LockDebug("Acquired %s", lock)
1389 result = fn(self, *args, **kwargs)
1391 _LockDebug("Releasing %s", lock)
1393 _LockDebug("Released %s", lock)
1399 """Locks a file using POSIX locks.
1403 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
1404 except IOError, err:
1405 if err.errno == errno.EAGAIN:
1406 raise errors.LockError("File already locked")
1410 class FileLock(object):
1411 """Utility class for file locks.
1414 def __init__(self, filename):
1415 self.filename = filename
1416 self.fd = open(self.filename, "w")
1426 def _flock(self, flag, blocking, timeout, errmsg):
1427 """Wrapper for fcntl.flock.
1430 @param flag: Operation flag
1431 @type blocking: bool
1432 @param blocking: Whether the operation should be done in blocking mode.
1433 @type timeout: None or float
1434 @param timeout: For how long the operation should be retried (implies
1436 @type errmsg: string
1437 @param errmsg: Error message in case operation fails.
1440 assert self.fd, "Lock was closed"
1441 assert timeout is None or timeout >= 0, \
1442 "If specified, timeout must be positive"
1444 if timeout is not None:
1445 flag |= fcntl.LOCK_NB
1446 timeout_end = time.time() + timeout
1448 # Blocking doesn't have effect with timeout
1450 flag |= fcntl.LOCK_NB
1456 fcntl.flock(self.fd, flag)
1458 except IOError, err:
1459 if err.errno in (errno.EAGAIN, ):
1460 if timeout_end is not None and time.time() < timeout_end:
1461 # Wait before trying again
1462 time.sleep(max(0.1, min(1.0, timeout)))
1464 raise errors.LockError(errmsg)
1466 logging.exception("fcntl.flock failed")
1469 def Exclusive(self, blocking=False, timeout=None):
1470 """Locks the file in exclusive mode.
1473 self._flock(fcntl.LOCK_EX, blocking, timeout,
1474 "Failed to lock %s in exclusive mode" % self.filename)
1476 def Shared(self, blocking=False, timeout=None):
1477 """Locks the file in shared mode.
1480 self._flock(fcntl.LOCK_SH, blocking, timeout,
1481 "Failed to lock %s in shared mode" % self.filename)
1483 def Unlock(self, blocking=True, timeout=None):
1484 """Unlocks the file.
1486 According to "man flock", unlocking can also be a nonblocking operation:
1487 "To make a non-blocking request, include LOCK_NB with any of the above
1491 self._flock(fcntl.LOCK_UN, blocking, timeout,
1492 "Failed to unlock %s" % self.filename)
1495 class SignalHandler(object):
1496 """Generic signal handler class.
1498 It automatically restores the original handler when deconstructed or when
1499 Reset() is called. You can either pass your own handler function in or query
1500 the "called" attribute to detect whether the signal was sent.
1503 def __init__(self, signum):
1504 """Constructs a new SignalHandler instance.
1506 @param signum: Single signal number or set of signal numbers
1509 if isinstance(signum, (int, long)):
1510 self.signum = set([signum])
1512 self.signum = set(signum)
1518 for signum in self.signum:
1520 prev_handler = signal.signal(signum, self._HandleSignal)
1522 self._previous[signum] = prev_handler
1524 # Restore previous handler
1525 signal.signal(signum, prev_handler)
1528 # Reset all handlers
1530 # Here we have a race condition: a handler may have already been called,
1531 # but there's not much we can do about it at this point.
1538 """Restore previous handler.
1541 for signum, prev_handler in self._previous.items():
1542 signal.signal(signum, prev_handler)
1543 # If successful, remove from dict
1544 del self._previous[signum]
1547 """Unsets "called" flag.
1549 This function can be used in case a signal may arrive several times.
1554 def _HandleSignal(self, signum, frame):
1555 """Actual signal handling function.
1558 # This is not nice and not absolutely atomic, but it appears to be the only
1559 # solution in Python -- there are no atomic types.