#: MAC checker regexp
_MAC_CHECK = re.compile("^([0-9a-f]{2}:){5}[0-9a-f]{2}$", re.I)
+(_TIMEOUT_NONE,
+ _TIMEOUT_TERM,
+ _TIMEOUT_KILL) = range(3)
+
+#: Shell param checker regexp
+_SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$")
+
+#: Unit checker regexp
+_PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")
+
+#: ASN1 time regexp
+_ASN1_TIME_REGEX = re.compile(r"^(\d+)([-+]\d\d)(\d\d)$")
+
+_SORTER_RE = re.compile("^%s(.*)$" % (8 * "(\D+|\d+)?"))
+_SORTER_DIGIT = re.compile("^\d+$")
+
class RunResult(object):
"""Holds the result of running external programs.
"failed", "fail_reason", "cmd"]
- def __init__(self, exit_code, signal_, stdout, stderr, cmd):
+ def __init__(self, exit_code, signal_, stdout, stderr, cmd, timeout_action,
+ timeout):
self.cmd = cmd
self.exit_code = exit_code
self.signal = signal_
self.stderr = stderr
self.failed = (signal_ is not None or exit_code != 0)
+ fail_msgs = []
if self.signal is not None:
- self.fail_reason = "terminated by signal %s" % self.signal
+ fail_msgs.append("terminated by signal %s" % self.signal)
elif self.exit_code is not None:
- self.fail_reason = "exited with exit code %s" % self.exit_code
+ fail_msgs.append("exited with exit code %s" % self.exit_code)
else:
- self.fail_reason = "unable to determine termination reason"
+ fail_msgs.append("unable to determine termination reason")
+
+ if timeout_action == _TIMEOUT_TERM:
+ fail_msgs.append("terminated after timeout of %.2f seconds" % timeout)
+ elif timeout_action == _TIMEOUT_KILL:
+ fail_msgs.append(("force termination after timeout of %.2f seconds"
+ " and linger for another %.2f seconds") %
+ (timeout, constants.CHILD_LINGER_TIMEOUT))
+
+ if fail_msgs and self.failed:
+ self.fail_reason = CommaJoin(fail_msgs)
if self.failed:
logging.debug("Command '%s' failed (%s); output: %s",
def RunCmd(cmd, env=None, output=None, cwd="/", reset_env=False,
- interactive=False):
+ interactive=False, timeout=None):
"""Execute a (shell) command.
The command should not read from its standard input, as it will be
@type interactive: boolean
@param interactive: weather we pipe stdin, stdout and stderr
(default behaviour) or run the command interactive
+ @type timeout: int
+ @param timeout: If not None, timeout in seconds until child process gets
+ killed
@rtype: L{RunResult}
@return: RunResult instance
@raise errors.ProgrammerError: if we call this when forks are disabled
try:
if output is None:
- out, err, status = _RunCmdPipe(cmd, cmd_env, shell, cwd, interactive)
+ out, err, status, timeout_action = _RunCmdPipe(cmd, cmd_env, shell, cwd,
+ interactive, timeout)
else:
+ timeout_action = _TIMEOUT_NONE
status = _RunCmdFile(cmd, cmd_env, shell, output, cwd)
out = err = ""
except OSError, err:
exitcode = None
signal_ = -status
- return RunResult(exitcode, signal_, out, err, strcmd)
+ return RunResult(exitcode, signal_, out, err, strcmd, timeout_action, timeout)
def SetupDaemonEnv(cwd="/", umask=077):
os.setsid()
+def SetupDaemonFDs(output_file, output_fd):
+ """Setups up a daemon's file descriptors.
+
+ @param output_file: if not None, the file to which to redirect
+ stdout/stderr
+ @param output_fd: if not None, the file descriptor for stdout/stderr
+
+ """
+ # check that at most one is defined
+ assert [output_file, output_fd].count(None) >= 1
+
+ # Open /dev/null (read-only, only for stdin)
+ devnull_fd = os.open(os.devnull, os.O_RDONLY)
+
+ if output_fd is not None:
+ pass
+ elif output_file is not None:
+ # Open output file
+ try:
+ output_fd = os.open(output_file,
+ os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
+ except EnvironmentError, err:
+ raise Exception("Opening output file failed: %s" % err)
+ else:
+ output_fd = os.open(os.devnull, os.O_WRONLY)
+
+ # Redirect standard I/O
+ os.dup2(devnull_fd, 0)
+ os.dup2(output_fd, 1)
+ os.dup2(output_fd, 2)
+
+
def StartDaemon(cmd, env=None, cwd="/", output=None, output_fd=None,
pidfile=None):
"""Start a daemon process after forking twice.
finally:
_CloseFDNoErr(errpipe_write)
- # Wait for daemon to be started (or an error message to arrive) and read
- # up to 100 KB as an error message
+ # Wait for daemon to be started (or an error message to
+ # arrive) and read up to 100 KB as an error message
errormsg = RetryOnSignal(os.read, errpipe_read, 100 * 1024)
finally:
_CloseFDNoErr(errpipe_read)
# Open PID file
if pidfile:
- try:
- # TODO: Atomic replace with another locked file instead of writing into
- # it after creating
- fd_pidfile = os.open(pidfile, os.O_WRONLY | os.O_CREAT, 0600)
-
- # Lock the PID file (and fail if not possible to do so). Any code
- # wanting to send a signal to the daemon should try to lock the PID
- # file before reading it. If acquiring the lock succeeds, the daemon is
- # no longer running and the signal should not be sent.
- LockFile(fd_pidfile)
-
- os.write(fd_pidfile, "%d\n" % os.getpid())
- except Exception, err:
- raise Exception("Creating and locking PID file failed: %s" % err)
+ fd_pidfile = WritePidFile(pidfile)
# Keeping the file open to hold the lock
noclose_fds.append(fd_pidfile)
else:
fd_pidfile = None
- # Open /dev/null
- fd_devnull = os.open(os.devnull, os.O_RDWR)
-
- assert not output or (bool(output) ^ (fd_output is not None))
-
- if fd_output is not None:
- pass
- elif output:
- # Open output file
- try:
- # TODO: Implement flag to set append=yes/no
- fd_output = os.open(output, os.O_WRONLY | os.O_CREAT, 0600)
- except EnvironmentError, err:
- raise Exception("Opening output file failed: %s" % err)
- else:
- fd_output = fd_devnull
-
- # Redirect standard I/O
- os.dup2(fd_devnull, 0)
- os.dup2(fd_output, 1)
- os.dup2(fd_output, 2)
+ SetupDaemonFDs(output, fd_output)
# Send daemon PID to parent
RetryOnSignal(os.write, pidpipe_write, str(os.getpid()))
except: # pylint: disable-msg=W0702
try:
# Report errors to original process
- buf = str(sys.exc_info()[1])
-
- RetryOnSignal(os.write, errpipe_write, buf)
+ WriteErrorToFD(errpipe_write, str(sys.exc_info()[1]))
except: # pylint: disable-msg=W0702
# Ignore errors in error handling
pass
os._exit(1) # pylint: disable-msg=W0212
-def _RunCmdPipe(cmd, env, via_shell, cwd, interactive):
+def WriteErrorToFD(fd, err):
+ """Possibly write an error message to a fd.
+
+ @type fd: None or int (file descriptor)
+ @param fd: if not None, the error will be written to this fd
+ @param err: string, the error message
+
+ """
+ if fd is None:
+ return
+
+ if not err:
+ err = "<unknown error>"
+
+ RetryOnSignal(os.write, fd, err)
+
+
+def _CheckIfAlive(child):
+ """Raises L{RetryAgain} if child is still alive.
+
+ @raises RetryAgain: If child is still alive
+
+ """
+ if child.poll() is None:
+ raise RetryAgain()
+
+
+def _WaitForProcess(child, timeout):
+ """Waits for the child to terminate or until we reach timeout.
+
+ """
+ try:
+ Retry(_CheckIfAlive, (1.0, 1.2, 5.0), max(0, timeout), args=[child])
+ except RetryTimeout:
+ pass
+
+
+def _RunCmdPipe(cmd, env, via_shell, cwd, interactive, timeout,
+ _linger_timeout=constants.CHILD_LINGER_TIMEOUT):
"""Run a command and return its output.
@type cmd: string or list
@param cwd: the working directory for the program
@type interactive: boolean
@param interactive: Run command interactive (without piping)
+ @type timeout: int
+ @param timeout: Timeout after the programm gets terminated
@rtype: tuple
@return: (out, err, status)
out = StringIO()
err = StringIO()
+
+ linger_timeout = None
+
+ if timeout is None:
+ poll_timeout = None
+ else:
+ poll_timeout = RunningTimeout(timeout, True).Remaining
+
+ msg_timeout = ("Command %s (%d) run into execution timeout, terminating" %
+ (cmd, child.pid))
+ msg_linger = ("Command %s (%d) run into linger timeout, killing" %
+ (cmd, child.pid))
+
+ timeout_action = _TIMEOUT_NONE
+
if not interactive:
child.stdin.close()
poller.register(child.stdout, select.POLLIN)
SetNonblockFlag(fd, True)
while fdmap:
- pollresult = RetryOnSignal(poller.poll)
+ if poll_timeout:
+ pt = poll_timeout() * 1000
+ if pt < 0:
+ if linger_timeout is None:
+ logging.warning(msg_timeout)
+ if child.poll() is None:
+ timeout_action = _TIMEOUT_TERM
+ IgnoreProcessNotFound(os.kill, child.pid, signal.SIGTERM)
+ linger_timeout = RunningTimeout(_linger_timeout, True).Remaining
+ pt = linger_timeout() * 1000
+ if pt < 0:
+ break
+ else:
+ pt = None
+
+ pollresult = RetryOnSignal(poller.poll, pt)
for fd, event in pollresult:
if event & select.POLLIN or event & select.POLLPRI:
poller.unregister(fd)
del fdmap[fd]
+ if timeout is not None:
+ assert callable(poll_timeout)
+
+ # We have no I/O left but it might still run
+ if child.poll() is None:
+ _WaitForProcess(child, poll_timeout())
+
+ # Terminate if still alive after timeout
+ if child.poll() is None:
+ if linger_timeout is None:
+ logging.warning(msg_timeout)
+ timeout_action = _TIMEOUT_TERM
+ IgnoreProcessNotFound(os.kill, child.pid, signal.SIGTERM)
+ lt = _linger_timeout
+ else:
+ lt = linger_timeout()
+ _WaitForProcess(child, lt)
+
+ # Okay, still alive after timeout and linger timeout? Kill it!
+ if child.poll() is None:
+ timeout_action = _TIMEOUT_KILL
+ logging.warning(msg_linger)
+ IgnoreProcessNotFound(os.kill, child.pid, signal.SIGKILL)
+
out = out.getvalue()
err = err.getvalue()
status = child.wait()
- return out, err, status
+ return out, err, status, timeout_action
def _RunCmdFile(cmd, env, via_shell, output, cwd):
return os.path.isdir("/sys/class/net/%s/bridge" % bridge)
-def NiceSort(name_list):
+def _NiceSortTryInt(val):
+ """Attempts to convert a string to an integer.
+
+ """
+ if val and _SORTER_DIGIT.match(val):
+ return int(val)
+ else:
+ return val
+
+
+def _NiceSortKey(value):
+ """Extract key for sorting.
+
+ """
+ return [_NiceSortTryInt(grp)
+ for grp in _SORTER_RE.match(value).groups()]
+
+
+def NiceSort(values, key=None):
"""Sort a list of strings based on digit and non-digit groupings.
Given a list of names C{['a1', 'a10', 'a11', 'a2']} this function
or no-digits. Only the first eight such groups are considered, and
after that we just use what's left of the string.
- @type name_list: list
- @param name_list: the names to be sorted
+ @type values: list
+ @param values: the names to be sorted
+ @type key: callable or None
+ @param key: function of one argument to extract a comparison key from each
+ list element, must return string
@rtype: list
@return: a copy of the name list sorted with our algorithm
"""
- _SORTER_BASE = "(\D+|\d+)"
- _SORTER_FULL = "^%s%s?%s?%s?%s?%s?%s?%s?.*$" % (_SORTER_BASE, _SORTER_BASE,
- _SORTER_BASE, _SORTER_BASE,
- _SORTER_BASE, _SORTER_BASE,
- _SORTER_BASE, _SORTER_BASE)
- _SORTER_RE = re.compile(_SORTER_FULL)
- _SORTER_NODIGIT = re.compile("^\D*$")
- def _TryInt(val):
- """Attempts to convert a variable to integer."""
- if val is None or _SORTER_NODIGIT.match(val):
- return val
- rval = int(val)
- return rval
+ if key is None:
+ keyfunc = _NiceSortKey
+ else:
+ keyfunc = lambda value: _NiceSortKey(key(value))
- to_sort = [([_TryInt(grp) for grp in _SORTER_RE.match(name).groups()], name)
- for name in name_list]
- to_sort.sort()
- return [tup[1] for tup in to_sort]
+ return sorted(values, key=keyfunc)
def TryConvert(fn, val):
@return: True if the word is 'safe'
"""
- return bool(re.match("^[-a-zA-Z0-9._+/:%@]+$", word))
+ return bool(_SHELLPARAM_REGEX.match(word))
def BuildShellCmd(template, *args):
is always an int in MiB.
"""
- m = re.match('^([.\d]+)\s*([a-zA-Z]+)?$', str(input_string))
+ m = _PARSEUNIT_REGEX.match(str(input_string))
if not m:
raise errors.UnitParseError("Invalid format")
def TimestampForFilename():
"""Returns the current time formatted for filenames.
- The format doesn't contain colons as some shells and applications them as
- separators.
+ The format doesn't contain colons as some shells and applications treat them
+ as separators. Uses the local timezone.
"""
return time.strftime("%Y-%m-%d_%H_%M_%S")
return result
+def GetFileID(path=None, fd=None):
+ """Returns the file 'id', i.e. the dev/inode and mtime information.
+
+ Either the path to the file or the fd must be given.
+
+ @param path: the file path
+ @param fd: a file descriptor
+ @return: a tuple of (device number, inode number, mtime)
+
+ """
+ if [path, fd].count(None) != 1:
+ raise errors.ProgrammerError("One and only one of fd/path must be given")
+
+ if fd is None:
+ st = os.stat(path)
+ else:
+ st = os.fstat(fd)
+
+ return (st.st_dev, st.st_ino, st.st_mtime)
+
+
+def VerifyFileID(fi_disk, fi_ours):
+ """Verifies that two file IDs are matching.
+
+ Differences in the inode/device are not accepted, but and older
+ timestamp for fi_disk is accepted.
+
+ @param fi_disk: tuple (dev, inode, mtime) representing the actual
+ file data
+ @param fi_ours: tuple (dev, inode, mtime) representing the last
+ written file data
+ @rtype: boolean
+
+ """
+ (d1, i1, m1) = fi_disk
+ (d2, i2, m2) = fi_ours
+
+ return (d1, i1) == (d2, i2) and m1 <= m2
+
+
+def SafeWriteFile(file_name, file_id, **kwargs):
+ """Wraper over L{WriteFile} that locks the target file.
+
+ By keeping the target file locked during WriteFile, we ensure that
+ cooperating writers will safely serialise access to the file.
+
+ @type file_name: str
+ @param file_name: the target filename
+ @type file_id: tuple
+ @param file_id: a result from L{GetFileID}
+
+ """
+ fd = os.open(file_name, os.O_RDONLY | os.O_CREAT)
+ try:
+ LockFile(fd)
+ if file_id is not None:
+ disk_id = GetFileID(fd=fd)
+ if not VerifyFileID(disk_id, file_id):
+ raise errors.LockError("Cannot overwrite file %s, it has been modified"
+ " since last written" % file_name)
+ return WriteFile(file_name, **kwargs)
+ finally:
+ os.close(fd)
+
+
def ReadOneLineFile(file_name, strict=False):
"""Return the first non-empty line from a file.
return [i for i in seq if i not in seen and not seen.add(i)]
+def FindDuplicates(seq):
+ """Identifies duplicates in a list.
+
+ Does not preserve element order.
+
+ @type seq: sequence
+ @param seq: Sequence with source elements
+ @rtype: list
+ @return: List of duplicate elements from seq
+
+ """
+ dup = set()
+ seen = set()
+
+ for item in seq:
+ if item in seen:
+ dup.add(item)
+ else:
+ seen.add(item)
+
+ return list(dup)
+
+
def NormalizeAndValidateMac(mac):
"""Normalizes and check if a MAC address is valid.
# pylint: disable-msg=W0212
# yes, we really want os._exit
+ # TODO: do another attempt to merge Daemonize and StartDaemon, or at
+ # least abstract the pipe functionality between them
+
+ # Create pipe for sending error messages
+ (rpipe, wpipe) = os.pipe()
+
# this might fail
pid = os.fork()
if (pid == 0): # The first child.
# this might fail
pid = os.fork() # Fork a second child.
if (pid == 0): # The second child.
- pass # nothing special to do in the child
+ _CloseFDNoErr(rpipe)
else:
# exit() or _exit()? See below.
os._exit(0) # Exit parent (the first child) of the second child.
else:
- os._exit(0) # Exit parent of the first child.
+ _CloseFDNoErr(wpipe)
+ # Wait for daemon to be started (or an error message to
+ # arrive) and read up to 100 KB as an error message
+ errormsg = RetryOnSignal(os.read, rpipe, 100 * 1024)
+ if errormsg:
+ sys.stderr.write("Error when starting daemon process: %r\n" % errormsg)
+ rcode = 1
+ else:
+ rcode = 0
+ os._exit(rcode) # Exit parent of the first child.
- for fd in range(3):
- _CloseFDNoErr(fd)
- i = os.open("/dev/null", os.O_RDONLY) # stdin
- assert i == 0, "Can't close/reopen stdin"
- i = os.open(logfile, os.O_WRONLY|os.O_CREAT|os.O_APPEND, 0600) # stdout
- assert i == 1, "Can't close/reopen stdout"
- # Duplicate standard output to standard error.
- os.dup2(1, 2)
- return 0
+ SetupDaemonFDs(logfile, None)
+ return wpipe
def DaemonPidFileName(name):
return True
-def WritePidFile(name):
+def WritePidFile(pidfile):
"""Write the current process pidfile.
- The file will be written to L{constants.RUN_GANETI_DIR}I{/name.pid}
-
- @type name: str
- @param name: the daemon name to use
- @raise errors.GenericError: if the pid file already exists and
+ @type pidfile: sting
+ @param pidfile: the path to the file to be written
+ @raise errors.LockError: if the pid file already exists and
points to a live process
+ @rtype: int
+ @return: the file descriptor of the lock file; do not close this unless
+ you want to unlock the pid file
"""
- pid = os.getpid()
- pidfilename = DaemonPidFileName(name)
- if IsProcessAlive(ReadPidFile(pidfilename)):
- raise errors.GenericError("%s contains a live process" % pidfilename)
+ # We don't rename nor truncate the file to not drop locks under
+ # existing processes
+ fd_pidfile = os.open(pidfile, os.O_WRONLY | os.O_CREAT, 0600)
+
+ # Lock the PID file (and fail if not possible to do so). Any code
+ # wanting to send a signal to the daemon should try to lock the PID
+ # file before reading it. If acquiring the lock succeeds, the daemon is
+ # no longer running and the signal should not be sent.
+ LockFile(fd_pidfile)
+
+ os.write(fd_pidfile, "%d\n" % os.getpid())
- WriteFile(pidfilename, data="%d\n" % pid)
+ return fd_pidfile
def RemovePidFile(name):
return rows[-lines:]
-def FormatTimestampWithTZ(secs):
- """Formats a Unix timestamp with the local timezone.
-
- """
- return time.strftime("%F %T %Z", time.gmtime(secs))
-
-
def _ParseAsn1Generalizedtime(value):
"""Parses an ASN1 GENERALIZEDTIME timestamp as used by pyOpenSSL.
@type value: string
@param value: ASN1 GENERALIZEDTIME timestamp
+ @return: Seconds since the Epoch (1970-01-01 00:00:00 UTC)
"""
- m = re.match(r"^(\d+)([-+]\d\d)(\d\d)$", value)
+ m = _ASN1_TIME_REGEX.match(value)
if m:
# We have an offset
asn1time = m.group(1)
if not_before is not None and not_after is not None:
msg += (" (valid from %s to %s)" %
- (FormatTimestampWithTZ(not_before),
- FormatTimestampWithTZ(not_after)))
+ (FormatTime(not_before), FormatTime(not_after)))
elif not_before is not None:
- msg += " (valid from %s)" % FormatTimestampWithTZ(not_before)
+ msg += " (valid from %s)" % FormatTime(not_before)
elif not_after is not None:
- msg += " (valid until %s)" % FormatTimestampWithTZ(not_after)
+ msg += " (valid until %s)" % FormatTime(not_after)
return (CERT_ERROR, msg)
elif not_before is not None and not_before > now:
return (CERT_WARNING,
"Certificate not yet valid (valid from %s)" %
- FormatTimestampWithTZ(not_before))
+ FormatTime(not_before))
elif not_after is not None:
remaining_days = int((not_after - now) / (24 * 3600))
return ", ".join([str(val) for val in names])
+def FindMatch(data, name):
+ """Tries to find an item in a dictionary matching a name.
+
+ Callers have to ensure the data names aren't contradictory (e.g. a regexp
+ that matches a string). If the name isn't a direct key, all regular
+ expression objects in the dictionary are matched against it.
+
+ @type data: dict
+ @param data: Dictionary containing data
+ @type name: string
+ @param name: Name to look for
+ @rtype: tuple; (value in dictionary, matched groups as list)
+
+ """
+ if name in data:
+ return (data[name], [])
+
+ for key, value in data.items():
+ # Regex objects
+ if hasattr(key, "match"):
+ m = key.match(name)
+ if m:
+ return (value, list(m.groups()))
+
+ return None
+
+
def BytesToMebibyte(value):
"""Converts bytes to mebibytes.
"""Formats a time value.
@type val: float or None
- @param val: the timestamp as returned by time.time()
+ @param val: Timestamp as returned by time.time() (seconds since Epoch,
+ 1970-01-01 00:00:00 UTC)
@return: a string value or N/A if we don't have a valid timestamp
"""
"""
return [val for val in items if not self.Matches(val)]
+
+
+class RunningTimeout(object):
+ """Class to calculate remaining timeout when doing several operations.
+
+ """
+ __slots__ = [
+ "_allow_negative",
+ "_start_time",
+ "_time_fn",
+ "_timeout",
+ ]
+
+ def __init__(self, timeout, allow_negative, _time_fn=time.time):
+ """Initializes this class.
+
+ @type timeout: float
+ @param timeout: Timeout duration
+ @type allow_negative: bool
+ @param allow_negative: Whether to return values below zero
+ @param _time_fn: Time function for unittests
+
+ """
+ object.__init__(self)
+
+ if timeout is not None and timeout < 0.0:
+ raise ValueError("Timeout must not be negative")
+
+ self._timeout = timeout
+ self._allow_negative = allow_negative
+ self._time_fn = _time_fn
+
+ self._start_time = None
+
+ def Remaining(self):
+ """Returns the remaining timeout.
+
+ """
+ if self._timeout is None:
+ return None
+
+ # Get start time on first calculation
+ if self._start_time is None:
+ self._start_time = self._time_fn()
+
+ # Calculate remaining time
+ remaining_timeout = self._start_time + self._timeout - self._time_fn()
+
+ if not self._allow_negative:
+ # Ensure timeout is always >= 0
+ return max(0.0, remaining_timeout)
+
+ return remaining_timeout