4 # Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Utility functions for processes.
35 from cStringIO import StringIO
37 from ganeti import errors
38 from ganeti import constants
40 from ganeti.utils import retry as utils_retry
41 from ganeti.utils import wrapper as utils_wrapper
42 from ganeti.utils import text as utils_text
43 from ganeti.utils import io as utils_io
44 from ganeti.utils import algo as utils_algo
47 #: when set to True, L{RunCmd} is disabled
52 _TIMEOUT_KILL) = range(3)
56 """Disables the use of fork(2).
59 global _no_fork # pylint: disable-msg=W0603
64 class RunResult(object):
65 """Holds the result of running external programs.
68 @ivar exit_code: the exit code of the program, or None (if the program
70 @type signal: int or None
71 @ivar signal: the signal that caused the program to finish, or None
72 (if the program wasn't terminated by a signal)
74 @ivar stdout: the standard output of the program
76 @ivar stderr: the standard error of the program
78 @ivar failed: True in case the program was
79 terminated by a signal or exited with a non-zero exit code
80 @ivar fail_reason: a string detailing the termination reason
83 __slots__ = ["exit_code", "signal", "stdout", "stderr",
84 "failed", "fail_reason", "cmd"]
87 def __init__(self, exit_code, signal_, stdout, stderr, cmd, timeout_action,
90 self.exit_code = exit_code
94 self.failed = (signal_ is not None or exit_code != 0)
97 if self.signal is not None:
98 fail_msgs.append("terminated by signal %s" % self.signal)
99 elif self.exit_code is not None:
100 fail_msgs.append("exited with exit code %s" % self.exit_code)
102 fail_msgs.append("unable to determine termination reason")
104 if timeout_action == _TIMEOUT_TERM:
105 fail_msgs.append("terminated after timeout of %.2f seconds" % timeout)
106 elif timeout_action == _TIMEOUT_KILL:
107 fail_msgs.append(("force termination after timeout of %.2f seconds"
108 " and linger for another %.2f seconds") %
109 (timeout, constants.CHILD_LINGER_TIMEOUT))
111 if fail_msgs and self.failed:
112 self.fail_reason = utils_text.CommaJoin(fail_msgs)
115 logging.debug("Command '%s' failed (%s); output: %s",
116 self.cmd, self.fail_reason, self.output)
118 def _GetOutput(self):
119 """Returns the combined stdout and stderr for easier usage.
122 return self.stdout + self.stderr
124 output = property(_GetOutput, None, None, "Return full output")
127 def _BuildCmdEnvironment(env, reset):
128 """Builds the environment for an external program.
134 cmd_env = os.environ.copy()
135 cmd_env["LC_ALL"] = "C"
143 def RunCmd(cmd, env=None, output=None, cwd="/", reset_env=False,
144 interactive=False, timeout=None, noclose_fds=None):
145 """Execute a (shell) command.
147 The command should not read from its standard input, as it will be
150 @type cmd: string or list
151 @param cmd: Command to run
153 @param env: Additional environment variables
155 @param output: if desired, the output of the command can be
156 saved in a file instead of the RunResult instance; this
157 parameter denotes the file name (if not None)
159 @param cwd: if specified, will be used as the working
160 directory for the command; the default will be /
161 @type reset_env: boolean
162 @param reset_env: whether to reset or keep the default os environment
163 @type interactive: boolean
164 @param interactive: weather we pipe stdin, stdout and stderr
165 (default behaviour) or run the command interactive
167 @param timeout: If not None, timeout in seconds until child process gets
169 @type noclose_fds: list
170 @param noclose_fds: list of additional (fd >=3) file descriptors to leave
171 open for the child process
173 @return: RunResult instance
174 @raise errors.ProgrammerError: if we call this when forks are disabled
178 raise errors.ProgrammerError("utils.RunCmd() called with fork() disabled")
180 if output and interactive:
181 raise errors.ProgrammerError("Parameters 'output' and 'interactive' can"
182 " not be provided at the same time")
184 if isinstance(cmd, basestring):
188 cmd = [str(val) for val in cmd]
189 strcmd = utils_text.ShellQuoteArgs(cmd)
193 logging.debug("RunCmd %s, output file '%s'", strcmd, output)
195 logging.debug("RunCmd %s", strcmd)
197 cmd_env = _BuildCmdEnvironment(env, reset_env)
201 out, err, status, timeout_action = _RunCmdPipe(cmd, cmd_env, shell, cwd,
202 interactive, timeout,
205 timeout_action = _TIMEOUT_NONE
206 status = _RunCmdFile(cmd, cmd_env, shell, output, cwd, noclose_fds)
209 if err.errno == errno.ENOENT:
210 raise errors.OpExecError("Can't execute '%s': not found (%s)" %
222 return RunResult(exitcode, signal_, out, err, strcmd, timeout_action, timeout)
225 def SetupDaemonEnv(cwd="/", umask=077):
226 """Setup a daemon's environment.
228 This should be called between the first and second fork, due to
231 @param cwd: the directory to which to chdir
232 @param umask: the umask to setup
240 def SetupDaemonFDs(output_file, output_fd):
241 """Setups up a daemon's file descriptors.
243 @param output_file: if not None, the file to which to redirect
245 @param output_fd: if not None, the file descriptor for stdout/stderr
248 # check that at most one is defined
249 assert [output_file, output_fd].count(None) >= 1
251 # Open /dev/null (read-only, only for stdin)
252 devnull_fd = os.open(os.devnull, os.O_RDONLY)
254 if output_fd is not None:
256 elif output_file is not None:
259 output_fd = os.open(output_file,
260 os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
261 except EnvironmentError, err:
262 raise Exception("Opening output file failed: %s" % err)
264 output_fd = os.open(os.devnull, os.O_WRONLY)
266 # Redirect standard I/O
267 os.dup2(devnull_fd, 0)
268 os.dup2(output_fd, 1)
269 os.dup2(output_fd, 2)
272 def StartDaemon(cmd, env=None, cwd="/", output=None, output_fd=None,
274 """Start a daemon process after forking twice.
276 @type cmd: string or list
277 @param cmd: Command to run
279 @param env: Additional environment variables
281 @param cwd: Working directory for the program
283 @param output: Path to file in which to save the output
285 @param output_fd: File descriptor for output
286 @type pidfile: string
287 @param pidfile: Process ID file
289 @return: Daemon process ID
290 @raise errors.ProgrammerError: if we call this when forks are disabled
294 raise errors.ProgrammerError("utils.StartDaemon() called with fork()"
297 if output and not (bool(output) ^ (output_fd is not None)):
298 raise errors.ProgrammerError("Only one of 'output' and 'output_fd' can be"
301 if isinstance(cmd, basestring):
302 cmd = ["/bin/sh", "-c", cmd]
304 strcmd = utils_text.ShellQuoteArgs(cmd)
307 logging.debug("StartDaemon %s, output file '%s'", strcmd, output)
309 logging.debug("StartDaemon %s", strcmd)
311 cmd_env = _BuildCmdEnvironment(env, False)
313 # Create pipe for sending PID back
314 (pidpipe_read, pidpipe_write) = os.pipe()
317 # Create pipe for sending error messages
318 (errpipe_read, errpipe_write) = os.pipe()
325 # Child process, won't return
326 _StartDaemonChild(errpipe_read, errpipe_write,
327 pidpipe_read, pidpipe_write,
329 output, output_fd, pidfile)
331 # Well, maybe child process failed
332 os._exit(1) # pylint: disable-msg=W0212
334 utils_wrapper.CloseFdNoError(errpipe_write)
336 # Wait for daemon to be started (or an error message to
337 # arrive) and read up to 100 KB as an error message
338 errormsg = utils_wrapper.RetryOnSignal(os.read, errpipe_read,
341 utils_wrapper.CloseFdNoError(errpipe_read)
343 utils_wrapper.CloseFdNoError(pidpipe_write)
345 # Read up to 128 bytes for PID
346 pidtext = utils_wrapper.RetryOnSignal(os.read, pidpipe_read, 128)
348 utils_wrapper.CloseFdNoError(pidpipe_read)
350 # Try to avoid zombies by waiting for child process
357 raise errors.OpExecError("Error when starting daemon process: %r" %
362 except (ValueError, TypeError), err:
363 raise errors.OpExecError("Error while trying to parse PID %r: %s" %
367 def _StartDaemonChild(errpipe_read, errpipe_write,
368 pidpipe_read, pidpipe_write,
370 output, fd_output, pidfile):
371 """Child process for starting daemon.
375 # Close parent's side
376 utils_wrapper.CloseFdNoError(errpipe_read)
377 utils_wrapper.CloseFdNoError(pidpipe_read)
379 # First child process
382 # And fork for the second time
385 # Exit first child process
386 os._exit(0) # pylint: disable-msg=W0212
388 # Make sure pipe is closed on execv* (and thereby notifies
390 utils_wrapper.SetCloseOnExecFlag(errpipe_write, True)
392 # List of file descriptors to be left open
393 noclose_fds = [errpipe_write]
397 fd_pidfile = utils_io.WritePidFile(pidfile)
399 # Keeping the file open to hold the lock
400 noclose_fds.append(fd_pidfile)
402 utils_wrapper.SetCloseOnExecFlag(fd_pidfile, False)
406 SetupDaemonFDs(output, fd_output)
408 # Send daemon PID to parent
409 utils_wrapper.RetryOnSignal(os.write, pidpipe_write, str(os.getpid()))
411 # Close all file descriptors except stdio and error message pipe
412 CloseFDs(noclose_fds=noclose_fds)
414 # Change working directory
418 os.execvp(args[0], args)
420 os.execvpe(args[0], args, env)
421 except: # pylint: disable-msg=W0702
423 # Report errors to original process
424 WriteErrorToFD(errpipe_write, str(sys.exc_info()[1]))
425 except: # pylint: disable-msg=W0702
426 # Ignore errors in error handling
429 os._exit(1) # pylint: disable-msg=W0212
432 def WriteErrorToFD(fd, err):
433 """Possibly write an error message to a fd.
435 @type fd: None or int (file descriptor)
436 @param fd: if not None, the error will be written to this fd
437 @param err: string, the error message
444 err = "<unknown error>"
446 utils_wrapper.RetryOnSignal(os.write, fd, err)
449 def _CheckIfAlive(child):
450 """Raises L{utils_retry.RetryAgain} if child is still alive.
452 @raises utils_retry.RetryAgain: If child is still alive
455 if child.poll() is None:
456 raise utils_retry.RetryAgain()
459 def _WaitForProcess(child, timeout):
460 """Waits for the child to terminate or until we reach timeout.
464 utils_retry.Retry(_CheckIfAlive, (1.0, 1.2, 5.0), max(0, timeout),
466 except utils_retry.RetryTimeout:
470 def _RunCmdPipe(cmd, env, via_shell, cwd, interactive, timeout, noclose_fds,
471 _linger_timeout=constants.CHILD_LINGER_TIMEOUT):
472 """Run a command and return its output.
474 @type cmd: string or list
475 @param cmd: Command to run
477 @param env: The environment to use
478 @type via_shell: bool
479 @param via_shell: if we should run via the shell
481 @param cwd: the working directory for the program
482 @type interactive: boolean
483 @param interactive: Run command interactive (without piping)
485 @param timeout: Timeout after the programm gets terminated
486 @type noclose_fds: list
487 @param noclose_fds: list of additional (fd >=3) file descriptors to leave
488 open for the child process
490 @return: (out, err, status)
493 poller = select.poll()
495 stderr = subprocess.PIPE
496 stdout = subprocess.PIPE
497 stdin = subprocess.PIPE
500 stderr = stdout = stdin = None
503 preexec_fn = lambda: CloseFDs(noclose_fds)
509 child = subprocess.Popen(cmd, shell=via_shell,
513 close_fds=close_fds, env=env,
515 preexec_fn=preexec_fn)
520 linger_timeout = None
525 poll_timeout = utils_algo.RunningTimeout(timeout, True).Remaining
527 msg_timeout = ("Command %s (%d) run into execution timeout, terminating" %
529 msg_linger = ("Command %s (%d) run into linger timeout, killing" %
532 timeout_action = _TIMEOUT_NONE
536 poller.register(child.stdout, select.POLLIN)
537 poller.register(child.stderr, select.POLLIN)
539 child.stdout.fileno(): (out, child.stdout),
540 child.stderr.fileno(): (err, child.stderr),
543 utils_wrapper.SetNonblockFlag(fd, True)
547 pt = poll_timeout() * 1000
549 if linger_timeout is None:
550 logging.warning(msg_timeout)
551 if child.poll() is None:
552 timeout_action = _TIMEOUT_TERM
553 utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid,
556 utils_algo.RunningTimeout(_linger_timeout, True).Remaining
557 pt = linger_timeout() * 1000
563 pollresult = utils_wrapper.RetryOnSignal(poller.poll, pt)
565 for fd, event in pollresult:
566 if event & select.POLLIN or event & select.POLLPRI:
567 data = fdmap[fd][1].read()
568 # no data from read signifies EOF (the same as POLLHUP)
570 poller.unregister(fd)
573 fdmap[fd][0].write(data)
574 if (event & select.POLLNVAL or event & select.POLLHUP or
575 event & select.POLLERR):
576 poller.unregister(fd)
579 if timeout is not None:
580 assert callable(poll_timeout)
582 # We have no I/O left but it might still run
583 if child.poll() is None:
584 _WaitForProcess(child, poll_timeout())
586 # Terminate if still alive after timeout
587 if child.poll() is None:
588 if linger_timeout is None:
589 logging.warning(msg_timeout)
590 timeout_action = _TIMEOUT_TERM
591 utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGTERM)
594 lt = linger_timeout()
595 _WaitForProcess(child, lt)
597 # Okay, still alive after timeout and linger timeout? Kill it!
598 if child.poll() is None:
599 timeout_action = _TIMEOUT_KILL
600 logging.warning(msg_linger)
601 utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGKILL)
606 status = child.wait()
607 return out, err, status, timeout_action
610 def _RunCmdFile(cmd, env, via_shell, output, cwd, noclose_fds):
611 """Run a command and save its output to a file.
613 @type cmd: string or list
614 @param cmd: Command to run
616 @param env: The environment to use
617 @type via_shell: bool
618 @param via_shell: if we should run via the shell
620 @param output: the filename in which to save the output
622 @param cwd: the working directory for the program
623 @type noclose_fds: list
624 @param noclose_fds: list of additional (fd >=3) file descriptors to leave
625 open for the child process
627 @return: the exit status
630 fh = open(output, "a")
633 preexec_fn = lambda: CloseFDs(noclose_fds + [fh.fileno()])
640 child = subprocess.Popen(cmd, shell=via_shell,
641 stderr=subprocess.STDOUT,
643 stdin=subprocess.PIPE,
644 close_fds=close_fds, env=env,
646 preexec_fn=preexec_fn)
649 status = child.wait()
655 def RunParts(dir_name, env=None, reset_env=False):
656 """Run Scripts or programs in a directory
658 @type dir_name: string
659 @param dir_name: absolute path to a directory
661 @param env: The environment to use
662 @type reset_env: boolean
663 @param reset_env: whether to reset or keep the default os environment
664 @rtype: list of tuples
665 @return: list of (name, (one of RUNDIR_STATUS), RunResult)
671 dir_contents = utils_io.ListVisibleFiles(dir_name)
673 logging.warning("RunParts: skipping %s (cannot list: %s)", dir_name, err)
676 for relname in sorted(dir_contents):
677 fname = utils_io.PathJoin(dir_name, relname)
678 if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
679 constants.EXT_PLUGIN_MASK.match(relname) is not None):
680 rr.append((relname, constants.RUNPARTS_SKIP, None))
683 result = RunCmd([fname], env=env, reset_env=reset_env)
684 except Exception, err: # pylint: disable-msg=W0703
685 rr.append((relname, constants.RUNPARTS_ERR, str(err)))
687 rr.append((relname, constants.RUNPARTS_RUN, result))
692 def _GetProcStatusPath(pid):
693 """Returns the path for a PID's proc status file.
696 @param pid: Process ID
700 return "/proc/%d/status" % pid
703 def IsProcessAlive(pid):
704 """Check if a given pid exists on the system.
706 @note: zombie status is not handled, so zombie processes
707 will be returned as alive
709 @param pid: the process ID to check
711 @return: True if the process exists
718 except EnvironmentError, err:
719 if err.errno in (errno.ENOENT, errno.ENOTDIR):
721 elif err.errno == errno.EINVAL:
722 raise utils_retry.RetryAgain(err)
725 assert isinstance(pid, int), "pid must be an integer"
729 # /proc in a multiprocessor environment can have strange behaviors.
730 # Retry the os.stat a few times until we get a good result.
732 return utils_retry.Retry(_TryStat, (0.01, 1.5, 0.1), 0.5,
733 args=[_GetProcStatusPath(pid)])
734 except utils_retry.RetryTimeout, err:
738 def _ParseSigsetT(sigset):
739 """Parse a rendered sigset_t value.
741 This is the opposite of the Linux kernel's fs/proc/array.c:render_sigset_t
745 @param sigset: Rendered signal set from /proc/$pid/status
747 @return: Set of all enabled signal numbers
753 for ch in reversed(sigset):
756 # The following could be done in a loop, but it's easier to read and
757 # understand in the unrolled form
759 result.add(signum + 1)
761 result.add(signum + 2)
763 result.add(signum + 3)
765 result.add(signum + 4)
772 def _GetProcStatusField(pstatus, field):
773 """Retrieves a field from the contents of a proc status file.
775 @type pstatus: string
776 @param pstatus: Contents of /proc/$pid/status
778 @param field: Name of field whose value should be returned
782 for line in pstatus.splitlines():
783 parts = line.split(":", 1)
785 if len(parts) < 2 or parts[0] != field:
788 return parts[1].strip()
793 def IsProcessHandlingSignal(pid, signum, status_path=None):
794 """Checks whether a process is handling a signal.
797 @param pid: Process ID
799 @param signum: Signal number
803 if status_path is None:
804 status_path = _GetProcStatusPath(pid)
807 proc_status = utils_io.ReadFile(status_path)
808 except EnvironmentError, err:
809 # In at least one case, reading /proc/$pid/status failed with ESRCH.
810 if err.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL, errno.ESRCH):
814 sigcgt = _GetProcStatusField(proc_status, "SigCgt")
816 raise RuntimeError("%s is missing 'SigCgt' field" % status_path)
818 # Now check whether signal is handled
819 return signum in _ParseSigsetT(sigcgt)
822 def Daemonize(logfile):
823 """Daemonize the current process.
825 This detaches the current process from the controlling terminal and
826 runs it in the background as a daemon.
829 @param logfile: the logfile to which we should redirect stdout/stderr
831 @return: the value zero
834 # pylint: disable-msg=W0212
835 # yes, we really want os._exit
837 # TODO: do another attempt to merge Daemonize and StartDaemon, or at
838 # least abstract the pipe functionality between them
840 # Create pipe for sending error messages
841 (rpipe, wpipe) = os.pipe()
845 if (pid == 0): # The first child.
849 pid = os.fork() # Fork a second child.
850 if (pid == 0): # The second child.
851 utils_wrapper.CloseFdNoError(rpipe)
853 # exit() or _exit()? See below.
854 os._exit(0) # Exit parent (the first child) of the second child.
856 utils_wrapper.CloseFdNoError(wpipe)
857 # Wait for daemon to be started (or an error message to
858 # arrive) and read up to 100 KB as an error message
859 errormsg = utils_wrapper.RetryOnSignal(os.read, rpipe, 100 * 1024)
861 sys.stderr.write("Error when starting daemon process: %r\n" % errormsg)
865 os._exit(rcode) # Exit parent of the first child.
867 SetupDaemonFDs(logfile, None)
871 def KillProcess(pid, signal_=signal.SIGTERM, timeout=30,
873 """Kill a process given by its pid.
876 @param pid: The PID to terminate.
878 @param signal_: The signal to send, by default SIGTERM
880 @param timeout: The timeout after which, if the process is still alive,
881 a SIGKILL will be sent. If not positive, no such checking
883 @type waitpid: boolean
884 @param waitpid: If true, we should waitpid on this process after
885 sending signals, since it's our own child and otherwise it
886 would remain as zombie
889 def _helper(pid, signal_, wait):
890 """Simple helper to encapsulate the kill/waitpid sequence"""
891 if utils_wrapper.IgnoreProcessNotFound(os.kill, pid, signal_) and wait:
893 os.waitpid(pid, os.WNOHANG)
898 # kill with pid=0 == suicide
899 raise errors.ProgrammerError("Invalid pid given '%s'" % pid)
901 if not IsProcessAlive(pid):
904 _helper(pid, signal_, waitpid)
910 if not IsProcessAlive(pid):
914 (result_pid, _) = os.waitpid(pid, os.WNOHANG)
916 raise utils_retry.RetryAgain()
921 raise utils_retry.RetryAgain()
924 # Wait up to $timeout seconds
925 utils_retry.Retry(_CheckProcess, (0.01, 1.5, 0.1), timeout)
926 except utils_retry.RetryTimeout:
929 if IsProcessAlive(pid):
930 # Kill process if it's still alive
931 _helper(pid, signal.SIGKILL, waitpid)
934 def RunInSeparateProcess(fn, *args):
935 """Runs a function in a separate process.
937 Note: Only boolean return values are supported.
940 @param fn: Function to be called
942 @return: Function's result
949 # In case the function uses temporary files
950 utils_wrapper.ResetTempfileModule()
953 result = int(bool(fn(*args)))
954 assert result in (0, 1)
955 except: # pylint: disable-msg=W0702
956 logging.exception("Error while calling function in separate process")
957 # 0 and 1 are reserved for the return value
960 os._exit(result) # pylint: disable-msg=W0212
964 # Avoid zombies and check exit code
965 (_, status) = os.waitpid(pid, 0)
967 if os.WIFSIGNALED(status):
969 signum = os.WTERMSIG(status)
971 exitcode = os.WEXITSTATUS(status)
974 if not (exitcode in (0, 1) and signum is None):
975 raise errors.GenericError("Child program failed (code=%s, signal=%s)" %
978 return bool(exitcode)
981 def CloseFDs(noclose_fds=None):
982 """Close file descriptors.
984 This closes all file descriptors above 2 (i.e. except
987 @type noclose_fds: list or None
988 @param noclose_fds: if given, it denotes a list of file descriptor
989 that should not be closed
992 # Default maximum for the number of available file descriptors.
993 if 'SC_OPEN_MAX' in os.sysconf_names:
995 MAXFD = os.sysconf('SC_OPEN_MAX')
1003 maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
1004 if (maxfd == resource.RLIM_INFINITY):
1007 # Iterate through and close all file descriptors (except the standard ones)
1008 for fd in range(3, maxfd):
1009 if noclose_fds and fd in noclose_fds:
1011 utils_wrapper.CloseFdNoError(fd)