Statistics
| Branch: | Tag: | Revision:

root / lib / utils / process.py @ eee68d57

History | View | Annotate | Download (28.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Utility functions for processes.
22

23
"""
24

    
25

    
26
import os
27
import sys
28
import subprocess
29
import errno
30
import select
31
import logging
32
import signal
33
import resource
34

    
35
from cStringIO import StringIO
36

    
37
from ganeti import errors
38
from ganeti import constants
39
from ganeti import compat
40

    
41
from ganeti.utils import retry as utils_retry
42
from ganeti.utils import wrapper as utils_wrapper
43
from ganeti.utils import text as utils_text
44
from ganeti.utils import io as utils_io
45
from ganeti.utils import algo as utils_algo
46

    
47

    
48
#: when set to True, L{RunCmd} is disabled
49
_no_fork = False
50

    
51
(_TIMEOUT_NONE,
52
 _TIMEOUT_TERM,
53
 _TIMEOUT_KILL) = range(3)
54

    
55

    
56
def DisableFork():
57
  """Disables the use of fork(2).
58

59
  """
60
  global _no_fork # pylint: disable-msg=W0603
61

    
62
  _no_fork = True
63

    
64

    
65
class RunResult(object):
66
  """Holds the result of running external programs.
67

68
  @type exit_code: int
69
  @ivar exit_code: the exit code of the program, or None (if the program
70
      didn't exit())
71
  @type signal: int or None
72
  @ivar signal: the signal that caused the program to finish, or None
73
      (if the program wasn't terminated by a signal)
74
  @type stdout: str
75
  @ivar stdout: the standard output of the program
76
  @type stderr: str
77
  @ivar stderr: the standard error of the program
78
  @type failed: boolean
79
  @ivar failed: True in case the program was
80
      terminated by a signal or exited with a non-zero exit code
81
  @ivar fail_reason: a string detailing the termination reason
82

83
  """
84
  __slots__ = ["exit_code", "signal", "stdout", "stderr",
85
               "failed", "fail_reason", "cmd"]
86

    
87

    
88
  def __init__(self, exit_code, signal_, stdout, stderr, cmd, timeout_action,
89
               timeout):
90
    self.cmd = cmd
91
    self.exit_code = exit_code
92
    self.signal = signal_
93
    self.stdout = stdout
94
    self.stderr = stderr
95
    self.failed = (signal_ is not None or exit_code != 0)
96

    
97
    fail_msgs = []
98
    if self.signal is not None:
99
      fail_msgs.append("terminated by signal %s" % self.signal)
100
    elif self.exit_code is not None:
101
      fail_msgs.append("exited with exit code %s" % self.exit_code)
102
    else:
103
      fail_msgs.append("unable to determine termination reason")
104

    
105
    if timeout_action == _TIMEOUT_TERM:
106
      fail_msgs.append("terminated after timeout of %.2f seconds" % timeout)
107
    elif timeout_action == _TIMEOUT_KILL:
108
      fail_msgs.append(("force termination after timeout of %.2f seconds"
109
                        " and linger for another %.2f seconds") %
110
                       (timeout, constants.CHILD_LINGER_TIMEOUT))
111

    
112
    if fail_msgs and self.failed:
113
      self.fail_reason = utils_text.CommaJoin(fail_msgs)
114

    
115
    if self.failed:
116
      logging.debug("Command '%s' failed (%s); output: %s",
117
                    self.cmd, self.fail_reason, self.output)
118

    
119
  def _GetOutput(self):
120
    """Returns the combined stdout and stderr for easier usage.
121

122
    """
123
    return self.stdout + self.stderr
124

    
125
  output = property(_GetOutput, None, None, "Return full output")
126

    
127

    
128
def _BuildCmdEnvironment(env, reset):
129
  """Builds the environment for an external program.
130

131
  """
132
  if reset:
133
    cmd_env = {}
134
  else:
135
    cmd_env = os.environ.copy()
136
    cmd_env["LC_ALL"] = "C"
137

    
138
  if env is not None:
139
    cmd_env.update(env)
140

    
141
  return cmd_env
142

    
143

    
144
def RunCmd(cmd, env=None, output=None, cwd="/", reset_env=False,
145
           interactive=False, timeout=None, noclose_fds=None,
146
           _postfork_fn=None):
147
  """Execute a (shell) command.
148

149
  The command should not read from its standard input, as it will be
150
  closed.
151

152
  @type cmd: string or list
153
  @param cmd: Command to run
154
  @type env: dict
155
  @param env: Additional environment variables
156
  @type output: str
157
  @param output: if desired, the output of the command can be
158
      saved in a file instead of the RunResult instance; this
159
      parameter denotes the file name (if not None)
160
  @type cwd: string
161
  @param cwd: if specified, will be used as the working
162
      directory for the command; the default will be /
163
  @type reset_env: boolean
164
  @param reset_env: whether to reset or keep the default os environment
165
  @type interactive: boolean
166
  @param interactive: whether we pipe stdin, stdout and stderr
167
                      (default behaviour) or run the command interactive
168
  @type timeout: int
169
  @param timeout: If not None, timeout in seconds until child process gets
170
                  killed
171
  @type noclose_fds: list
172
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
173
                      open for the child process
174
  @param _postfork_fn: Callback run after fork but before timeout (unittest)
175
  @rtype: L{RunResult}
176
  @return: RunResult instance
177
  @raise errors.ProgrammerError: if we call this when forks are disabled
178

179
  """
180
  if _no_fork:
181
    raise errors.ProgrammerError("utils.RunCmd() called with fork() disabled")
182

    
183
  if output and interactive:
184
    raise errors.ProgrammerError("Parameters 'output' and 'interactive' can"
185
                                 " not be provided at the same time")
186

    
187
  if isinstance(cmd, basestring):
188
    strcmd = cmd
189
    shell = True
190
  else:
191
    cmd = [str(val) for val in cmd]
192
    strcmd = utils_text.ShellQuoteArgs(cmd)
193
    shell = False
194

    
195
  if output:
196
    logging.debug("RunCmd %s, output file '%s'", strcmd, output)
197
  else:
198
    logging.debug("RunCmd %s", strcmd)
199

    
200
  cmd_env = _BuildCmdEnvironment(env, reset_env)
201

    
202
  try:
203
    if output is None:
204
      out, err, status, timeout_action = _RunCmdPipe(cmd, cmd_env, shell, cwd,
205
                                                     interactive, timeout,
206
                                                     noclose_fds,
207
                                                     _postfork_fn=_postfork_fn)
208
    else:
209
      assert _postfork_fn is None, \
210
          "_postfork_fn not supported if output provided"
211
      timeout_action = _TIMEOUT_NONE
212
      status = _RunCmdFile(cmd, cmd_env, shell, output, cwd, noclose_fds)
213
      out = err = ""
214
  except OSError, err:
215
    if err.errno == errno.ENOENT:
216
      raise errors.OpExecError("Can't execute '%s': not found (%s)" %
217
                               (strcmd, err))
218
    else:
219
      raise
220

    
221
  if status >= 0:
222
    exitcode = status
223
    signal_ = None
224
  else:
225
    exitcode = None
226
    signal_ = -status
227

    
228
  return RunResult(exitcode, signal_, out, err, strcmd, timeout_action, timeout)
229

    
230

    
231
def SetupDaemonEnv(cwd="/", umask=077):
232
  """Setup a daemon's environment.
233

234
  This should be called between the first and second fork, due to
235
  setsid usage.
236

237
  @param cwd: the directory to which to chdir
238
  @param umask: the umask to setup
239

240
  """
241
  os.chdir(cwd)
242
  os.umask(umask)
243
  os.setsid()
244

    
245

    
246
def SetupDaemonFDs(output_file, output_fd):
247
  """Setups up a daemon's file descriptors.
248

249
  @param output_file: if not None, the file to which to redirect
250
      stdout/stderr
251
  @param output_fd: if not None, the file descriptor for stdout/stderr
252

253
  """
254
  # check that at most one is defined
255
  assert [output_file, output_fd].count(None) >= 1
256

    
257
  # Open /dev/null (read-only, only for stdin)
258
  devnull_fd = os.open(os.devnull, os.O_RDONLY)
259

    
260
  output_close = True
261

    
262
  if output_fd is not None:
263
    output_close = False
264
  elif output_file is not None:
265
    # Open output file
266
    try:
267
      output_fd = os.open(output_file,
268
                          os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
269
    except EnvironmentError, err:
270
      raise Exception("Opening output file failed: %s" % err)
271
  else:
272
    output_fd = os.open(os.devnull, os.O_WRONLY)
273

    
274
  # Redirect standard I/O
275
  os.dup2(devnull_fd, 0)
276
  os.dup2(output_fd, 1)
277
  os.dup2(output_fd, 2)
278

    
279
  if devnull_fd > 2:
280
    utils_wrapper.CloseFdNoError(devnull_fd)
281

    
282
  if output_close and output_fd > 2:
283
    utils_wrapper.CloseFdNoError(output_fd)
284

    
285

    
286
def StartDaemon(cmd, env=None, cwd="/", output=None, output_fd=None,
287
                pidfile=None):
288
  """Start a daemon process after forking twice.
289

290
  @type cmd: string or list
291
  @param cmd: Command to run
292
  @type env: dict
293
  @param env: Additional environment variables
294
  @type cwd: string
295
  @param cwd: Working directory for the program
296
  @type output: string
297
  @param output: Path to file in which to save the output
298
  @type output_fd: int
299
  @param output_fd: File descriptor for output
300
  @type pidfile: string
301
  @param pidfile: Process ID file
302
  @rtype: int
303
  @return: Daemon process ID
304
  @raise errors.ProgrammerError: if we call this when forks are disabled
305

306
  """
307
  if _no_fork:
308
    raise errors.ProgrammerError("utils.StartDaemon() called with fork()"
309
                                 " disabled")
310

    
311
  if output and not (bool(output) ^ (output_fd is not None)):
312
    raise errors.ProgrammerError("Only one of 'output' and 'output_fd' can be"
313
                                 " specified")
314

    
315
  if isinstance(cmd, basestring):
316
    cmd = ["/bin/sh", "-c", cmd]
317

    
318
  strcmd = utils_text.ShellQuoteArgs(cmd)
319

    
320
  if output:
321
    logging.debug("StartDaemon %s, output file '%s'", strcmd, output)
322
  else:
323
    logging.debug("StartDaemon %s", strcmd)
324

    
325
  cmd_env = _BuildCmdEnvironment(env, False)
326

    
327
  # Create pipe for sending PID back
328
  (pidpipe_read, pidpipe_write) = os.pipe()
329
  try:
330
    try:
331
      # Create pipe for sending error messages
332
      (errpipe_read, errpipe_write) = os.pipe()
333
      try:
334
        try:
335
          # First fork
336
          pid = os.fork()
337
          if pid == 0:
338
            try:
339
              # Child process, won't return
340
              _StartDaemonChild(errpipe_read, errpipe_write,
341
                                pidpipe_read, pidpipe_write,
342
                                cmd, cmd_env, cwd,
343
                                output, output_fd, pidfile)
344
            finally:
345
              # Well, maybe child process failed
346
              os._exit(1) # pylint: disable-msg=W0212
347
        finally:
348
          utils_wrapper.CloseFdNoError(errpipe_write)
349

    
350
        # Wait for daemon to be started (or an error message to
351
        # arrive) and read up to 100 KB as an error message
352
        errormsg = utils_wrapper.RetryOnSignal(os.read, errpipe_read,
353
                                               100 * 1024)
354
      finally:
355
        utils_wrapper.CloseFdNoError(errpipe_read)
356
    finally:
357
      utils_wrapper.CloseFdNoError(pidpipe_write)
358

    
359
    # Read up to 128 bytes for PID
360
    pidtext = utils_wrapper.RetryOnSignal(os.read, pidpipe_read, 128)
361
  finally:
362
    utils_wrapper.CloseFdNoError(pidpipe_read)
363

    
364
  # Try to avoid zombies by waiting for child process
365
  try:
366
    os.waitpid(pid, 0)
367
  except OSError:
368
    pass
369

    
370
  if errormsg:
371
    raise errors.OpExecError("Error when starting daemon process: %r" %
372
                             errormsg)
373

    
374
  try:
375
    return int(pidtext)
376
  except (ValueError, TypeError), err:
377
    raise errors.OpExecError("Error while trying to parse PID %r: %s" %
378
                             (pidtext, err))
379

    
380

    
381
def _StartDaemonChild(errpipe_read, errpipe_write,
382
                      pidpipe_read, pidpipe_write,
383
                      args, env, cwd,
384
                      output, fd_output, pidfile):
385
  """Child process for starting daemon.
386

387
  """
388
  try:
389
    # Close parent's side
390
    utils_wrapper.CloseFdNoError(errpipe_read)
391
    utils_wrapper.CloseFdNoError(pidpipe_read)
392

    
393
    # First child process
394
    SetupDaemonEnv()
395

    
396
    # And fork for the second time
397
    pid = os.fork()
398
    if pid != 0:
399
      # Exit first child process
400
      os._exit(0) # pylint: disable-msg=W0212
401

    
402
    # Make sure pipe is closed on execv* (and thereby notifies
403
    # original process)
404
    utils_wrapper.SetCloseOnExecFlag(errpipe_write, True)
405

    
406
    # List of file descriptors to be left open
407
    noclose_fds = [errpipe_write]
408

    
409
    # Open PID file
410
    if pidfile:
411
      fd_pidfile = utils_io.WritePidFile(pidfile)
412

    
413
      # Keeping the file open to hold the lock
414
      noclose_fds.append(fd_pidfile)
415

    
416
      utils_wrapper.SetCloseOnExecFlag(fd_pidfile, False)
417
    else:
418
      fd_pidfile = None
419

    
420
    SetupDaemonFDs(output, fd_output)
421

    
422
    # Send daemon PID to parent
423
    utils_wrapper.RetryOnSignal(os.write, pidpipe_write, str(os.getpid()))
424

    
425
    # Close all file descriptors except stdio and error message pipe
426
    CloseFDs(noclose_fds=noclose_fds)
427

    
428
    # Change working directory
429
    os.chdir(cwd)
430

    
431
    if env is None:
432
      os.execvp(args[0], args)
433
    else:
434
      os.execvpe(args[0], args, env)
435
  except: # pylint: disable-msg=W0702
436
    try:
437
      # Report errors to original process
438
      WriteErrorToFD(errpipe_write, str(sys.exc_info()[1]))
439
    except: # pylint: disable-msg=W0702
440
      # Ignore errors in error handling
441
      pass
442

    
443
  os._exit(1) # pylint: disable-msg=W0212
444

    
445

    
446
def WriteErrorToFD(fd, err):
447
  """Possibly write an error message to a fd.
448

449
  @type fd: None or int (file descriptor)
450
  @param fd: if not None, the error will be written to this fd
451
  @param err: string, the error message
452

453
  """
454
  if fd is None:
455
    return
456

    
457
  if not err:
458
    err = "<unknown error>"
459

    
460
  utils_wrapper.RetryOnSignal(os.write, fd, err)
461

    
462

    
463
def _CheckIfAlive(child):
464
  """Raises L{utils_retry.RetryAgain} if child is still alive.
465

466
  @raises utils_retry.RetryAgain: If child is still alive
467

468
  """
469
  if child.poll() is None:
470
    raise utils_retry.RetryAgain()
471

    
472

    
473
def _WaitForProcess(child, timeout):
474
  """Waits for the child to terminate or until we reach timeout.
475

476
  """
477
  try:
478
    utils_retry.Retry(_CheckIfAlive, (1.0, 1.2, 5.0), max(0, timeout),
479
                      args=[child])
480
  except utils_retry.RetryTimeout:
481
    pass
482

    
483

    
484
def _RunCmdPipe(cmd, env, via_shell, cwd, interactive, timeout, noclose_fds,
485
                _linger_timeout=constants.CHILD_LINGER_TIMEOUT,
486
                _postfork_fn=None):
487
  """Run a command and return its output.
488

489
  @type  cmd: string or list
490
  @param cmd: Command to run
491
  @type env: dict
492
  @param env: The environment to use
493
  @type via_shell: bool
494
  @param via_shell: if we should run via the shell
495
  @type cwd: string
496
  @param cwd: the working directory for the program
497
  @type interactive: boolean
498
  @param interactive: Run command interactive (without piping)
499
  @type timeout: int
500
  @param timeout: Timeout after the programm gets terminated
501
  @type noclose_fds: list
502
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
503
                      open for the child process
504
  @param _postfork_fn: Function run after fork but before timeout (unittest)
505
  @rtype: tuple
506
  @return: (out, err, status)
507

508
  """
509
  poller = select.poll()
510

    
511
  stderr = subprocess.PIPE
512
  stdout = subprocess.PIPE
513
  stdin = subprocess.PIPE
514

    
515
  if interactive:
516
    stderr = stdout = stdin = None
517

    
518
  if noclose_fds:
519
    preexec_fn = lambda: CloseFDs(noclose_fds)
520
    close_fds = False
521
  else:
522
    preexec_fn = None
523
    close_fds = True
524

    
525
  child = subprocess.Popen(cmd, shell=via_shell,
526
                           stderr=stderr,
527
                           stdout=stdout,
528
                           stdin=stdin,
529
                           close_fds=close_fds, env=env,
530
                           cwd=cwd,
531
                           preexec_fn=preexec_fn)
532

    
533
  if _postfork_fn:
534
    _postfork_fn(child.pid)
535

    
536
  out = StringIO()
537
  err = StringIO()
538

    
539
  linger_timeout = None
540

    
541
  if timeout is None:
542
    poll_timeout = None
543
  else:
544
    poll_timeout = utils_algo.RunningTimeout(timeout, True).Remaining
545

    
546
  msg_timeout = ("Command %s (%d) run into execution timeout, terminating" %
547
                 (cmd, child.pid))
548
  msg_linger = ("Command %s (%d) run into linger timeout, killing" %
549
                (cmd, child.pid))
550

    
551
  timeout_action = _TIMEOUT_NONE
552

    
553
  if not interactive:
554
    child.stdin.close()
555
    poller.register(child.stdout, select.POLLIN)
556
    poller.register(child.stderr, select.POLLIN)
557
    fdmap = {
558
      child.stdout.fileno(): (out, child.stdout),
559
      child.stderr.fileno(): (err, child.stderr),
560
      }
561
    for fd in fdmap:
562
      utils_wrapper.SetNonblockFlag(fd, True)
563

    
564
    while fdmap:
565
      if poll_timeout:
566
        pt = poll_timeout() * 1000
567
        if pt < 0:
568
          if linger_timeout is None:
569
            logging.warning(msg_timeout)
570
            if child.poll() is None:
571
              timeout_action = _TIMEOUT_TERM
572
              utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid,
573
                                                  signal.SIGTERM)
574
            linger_timeout = \
575
              utils_algo.RunningTimeout(_linger_timeout, True).Remaining
576
          pt = linger_timeout() * 1000
577
          if pt < 0:
578
            break
579
      else:
580
        pt = None
581

    
582
      pollresult = utils_wrapper.RetryOnSignal(poller.poll, pt)
583

    
584
      for fd, event in pollresult:
585
        if event & select.POLLIN or event & select.POLLPRI:
586
          data = fdmap[fd][1].read()
587
          # no data from read signifies EOF (the same as POLLHUP)
588
          if not data:
589
            poller.unregister(fd)
590
            del fdmap[fd]
591
            continue
592
          fdmap[fd][0].write(data)
593
        if (event & select.POLLNVAL or event & select.POLLHUP or
594
            event & select.POLLERR):
595
          poller.unregister(fd)
596
          del fdmap[fd]
597

    
598
  if timeout is not None:
599
    assert callable(poll_timeout)
600

    
601
    # We have no I/O left but it might still run
602
    if child.poll() is None:
603
      _WaitForProcess(child, poll_timeout())
604

    
605
    # Terminate if still alive after timeout
606
    if child.poll() is None:
607
      if linger_timeout is None:
608
        logging.warning(msg_timeout)
609
        timeout_action = _TIMEOUT_TERM
610
        utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGTERM)
611
        lt = _linger_timeout
612
      else:
613
        lt = linger_timeout()
614
      _WaitForProcess(child, lt)
615

    
616
    # Okay, still alive after timeout and linger timeout? Kill it!
617
    if child.poll() is None:
618
      timeout_action = _TIMEOUT_KILL
619
      logging.warning(msg_linger)
620
      utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGKILL)
621

    
622
  out = out.getvalue()
623
  err = err.getvalue()
624

    
625
  status = child.wait()
626
  return out, err, status, timeout_action
627

    
628

    
629
def _RunCmdFile(cmd, env, via_shell, output, cwd, noclose_fds):
630
  """Run a command and save its output to a file.
631

632
  @type  cmd: string or list
633
  @param cmd: Command to run
634
  @type env: dict
635
  @param env: The environment to use
636
  @type via_shell: bool
637
  @param via_shell: if we should run via the shell
638
  @type output: str
639
  @param output: the filename in which to save the output
640
  @type cwd: string
641
  @param cwd: the working directory for the program
642
  @type noclose_fds: list
643
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
644
                      open for the child process
645
  @rtype: int
646
  @return: the exit status
647

648
  """
649
  fh = open(output, "a")
650

    
651
  if noclose_fds:
652
    preexec_fn = lambda: CloseFDs(noclose_fds + [fh.fileno()])
653
    close_fds = False
654
  else:
655
    preexec_fn = None
656
    close_fds = True
657

    
658
  try:
659
    child = subprocess.Popen(cmd, shell=via_shell,
660
                             stderr=subprocess.STDOUT,
661
                             stdout=fh,
662
                             stdin=subprocess.PIPE,
663
                             close_fds=close_fds, env=env,
664
                             cwd=cwd,
665
                             preexec_fn=preexec_fn)
666

    
667
    child.stdin.close()
668
    status = child.wait()
669
  finally:
670
    fh.close()
671
  return status
672

    
673

    
674
def RunParts(dir_name, env=None, reset_env=False):
675
  """Run Scripts or programs in a directory
676

677
  @type dir_name: string
678
  @param dir_name: absolute path to a directory
679
  @type env: dict
680
  @param env: The environment to use
681
  @type reset_env: boolean
682
  @param reset_env: whether to reset or keep the default os environment
683
  @rtype: list of tuples
684
  @return: list of (name, (one of RUNDIR_STATUS), RunResult)
685

686
  """
687
  rr = []
688

    
689
  try:
690
    dir_contents = utils_io.ListVisibleFiles(dir_name)
691
  except OSError, err:
692
    logging.warning("RunParts: skipping %s (cannot list: %s)", dir_name, err)
693
    return rr
694

    
695
  for relname in sorted(dir_contents):
696
    fname = utils_io.PathJoin(dir_name, relname)
697
    if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
698
            constants.EXT_PLUGIN_MASK.match(relname) is not None):
699
      rr.append((relname, constants.RUNPARTS_SKIP, None))
700
    else:
701
      try:
702
        result = RunCmd([fname], env=env, reset_env=reset_env)
703
      except Exception, err: # pylint: disable-msg=W0703
704
        rr.append((relname, constants.RUNPARTS_ERR, str(err)))
705
      else:
706
        rr.append((relname, constants.RUNPARTS_RUN, result))
707

    
708
  return rr
709

    
710

    
711
def _GetProcStatusPath(pid):
712
  """Returns the path for a PID's proc status file.
713

714
  @type pid: int
715
  @param pid: Process ID
716
  @rtype: string
717

718
  """
719
  return "/proc/%d/status" % pid
720

    
721

    
722
def IsProcessAlive(pid):
723
  """Check if a given pid exists on the system.
724

725
  @note: zombie status is not handled, so zombie processes
726
      will be returned as alive
727
  @type pid: int
728
  @param pid: the process ID to check
729
  @rtype: boolean
730
  @return: True if the process exists
731

732
  """
733
  def _TryStat(name):
734
    try:
735
      os.stat(name)
736
      return True
737
    except EnvironmentError, err:
738
      if err.errno in (errno.ENOENT, errno.ENOTDIR):
739
        return False
740
      elif err.errno == errno.EINVAL:
741
        raise utils_retry.RetryAgain(err)
742
      raise
743

    
744
  assert isinstance(pid, int), "pid must be an integer"
745
  if pid <= 0:
746
    return False
747

    
748
  # /proc in a multiprocessor environment can have strange behaviors.
749
  # Retry the os.stat a few times until we get a good result.
750
  try:
751
    return utils_retry.Retry(_TryStat, (0.01, 1.5, 0.1), 0.5,
752
                             args=[_GetProcStatusPath(pid)])
753
  except utils_retry.RetryTimeout, err:
754
    err.RaiseInner()
755

    
756

    
757
def _ParseSigsetT(sigset):
758
  """Parse a rendered sigset_t value.
759

760
  This is the opposite of the Linux kernel's fs/proc/array.c:render_sigset_t
761
  function.
762

763
  @type sigset: string
764
  @param sigset: Rendered signal set from /proc/$pid/status
765
  @rtype: set
766
  @return: Set of all enabled signal numbers
767

768
  """
769
  result = set()
770

    
771
  signum = 0
772
  for ch in reversed(sigset):
773
    chv = int(ch, 16)
774

    
775
    # The following could be done in a loop, but it's easier to read and
776
    # understand in the unrolled form
777
    if chv & 1:
778
      result.add(signum + 1)
779
    if chv & 2:
780
      result.add(signum + 2)
781
    if chv & 4:
782
      result.add(signum + 3)
783
    if chv & 8:
784
      result.add(signum + 4)
785

    
786
    signum += 4
787

    
788
  return result
789

    
790

    
791
def _GetProcStatusField(pstatus, field):
792
  """Retrieves a field from the contents of a proc status file.
793

794
  @type pstatus: string
795
  @param pstatus: Contents of /proc/$pid/status
796
  @type field: string
797
  @param field: Name of field whose value should be returned
798
  @rtype: string
799

800
  """
801
  for line in pstatus.splitlines():
802
    parts = line.split(":", 1)
803

    
804
    if len(parts) < 2 or parts[0] != field:
805
      continue
806

    
807
    return parts[1].strip()
808

    
809
  return None
810

    
811

    
812
def IsProcessHandlingSignal(pid, signum, status_path=None):
813
  """Checks whether a process is handling a signal.
814

815
  @type pid: int
816
  @param pid: Process ID
817
  @type signum: int
818
  @param signum: Signal number
819
  @rtype: bool
820

821
  """
822
  if status_path is None:
823
    status_path = _GetProcStatusPath(pid)
824

    
825
  try:
826
    proc_status = utils_io.ReadFile(status_path)
827
  except EnvironmentError, err:
828
    # In at least one case, reading /proc/$pid/status failed with ESRCH.
829
    if err.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL, errno.ESRCH):
830
      return False
831
    raise
832

    
833
  sigcgt = _GetProcStatusField(proc_status, "SigCgt")
834
  if sigcgt is None:
835
    raise RuntimeError("%s is missing 'SigCgt' field" % status_path)
836

    
837
  # Now check whether signal is handled
838
  return signum in _ParseSigsetT(sigcgt)
839

    
840

    
841
def Daemonize(logfile):
842
  """Daemonize the current process.
843

844
  This detaches the current process from the controlling terminal and
845
  runs it in the background as a daemon.
846

847
  @type logfile: str
848
  @param logfile: the logfile to which we should redirect stdout/stderr
849
  @rtype: tuple; (int, callable)
850
  @return: File descriptor of pipe(2) which must be closed to notify parent
851
    process and a callable to reopen log files
852

853
  """
854
  # pylint: disable-msg=W0212
855
  # yes, we really want os._exit
856

    
857
  # TODO: do another attempt to merge Daemonize and StartDaemon, or at
858
  # least abstract the pipe functionality between them
859

    
860
  # Create pipe for sending error messages
861
  (rpipe, wpipe) = os.pipe()
862

    
863
  # this might fail
864
  pid = os.fork()
865
  if (pid == 0):  # The first child.
866
    SetupDaemonEnv()
867

    
868
    # this might fail
869
    pid = os.fork() # Fork a second child.
870
    if (pid == 0):  # The second child.
871
      utils_wrapper.CloseFdNoError(rpipe)
872
    else:
873
      # exit() or _exit()?  See below.
874
      os._exit(0) # Exit parent (the first child) of the second child.
875
  else:
876
    utils_wrapper.CloseFdNoError(wpipe)
877
    # Wait for daemon to be started (or an error message to
878
    # arrive) and read up to 100 KB as an error message
879
    errormsg = utils_wrapper.RetryOnSignal(os.read, rpipe, 100 * 1024)
880
    if errormsg:
881
      sys.stderr.write("Error when starting daemon process: %r\n" % errormsg)
882
      rcode = 1
883
    else:
884
      rcode = 0
885
    os._exit(rcode) # Exit parent of the first child.
886

    
887
  reopen_fn = compat.partial(SetupDaemonFDs, logfile, None)
888

    
889
  # Open logs for the first time
890
  reopen_fn()
891

    
892
  return (wpipe, reopen_fn)
893

    
894

    
895
def KillProcess(pid, signal_=signal.SIGTERM, timeout=30,
896
                waitpid=False):
897
  """Kill a process given by its pid.
898

899
  @type pid: int
900
  @param pid: The PID to terminate.
901
  @type signal_: int
902
  @param signal_: The signal to send, by default SIGTERM
903
  @type timeout: int
904
  @param timeout: The timeout after which, if the process is still alive,
905
                  a SIGKILL will be sent. If not positive, no such checking
906
                  will be done
907
  @type waitpid: boolean
908
  @param waitpid: If true, we should waitpid on this process after
909
      sending signals, since it's our own child and otherwise it
910
      would remain as zombie
911

912
  """
913
  def _helper(pid, signal_, wait):
914
    """Simple helper to encapsulate the kill/waitpid sequence"""
915
    if utils_wrapper.IgnoreProcessNotFound(os.kill, pid, signal_) and wait:
916
      try:
917
        os.waitpid(pid, os.WNOHANG)
918
      except OSError:
919
        pass
920

    
921
  if pid <= 0:
922
    # kill with pid=0 == suicide
923
    raise errors.ProgrammerError("Invalid pid given '%s'" % pid)
924

    
925
  if not IsProcessAlive(pid):
926
    return
927

    
928
  _helper(pid, signal_, waitpid)
929

    
930
  if timeout <= 0:
931
    return
932

    
933
  def _CheckProcess():
934
    if not IsProcessAlive(pid):
935
      return
936

    
937
    try:
938
      (result_pid, _) = os.waitpid(pid, os.WNOHANG)
939
    except OSError:
940
      raise utils_retry.RetryAgain()
941

    
942
    if result_pid > 0:
943
      return
944

    
945
    raise utils_retry.RetryAgain()
946

    
947
  try:
948
    # Wait up to $timeout seconds
949
    utils_retry.Retry(_CheckProcess, (0.01, 1.5, 0.1), timeout)
950
  except utils_retry.RetryTimeout:
951
    pass
952

    
953
  if IsProcessAlive(pid):
954
    # Kill process if it's still alive
955
    _helper(pid, signal.SIGKILL, waitpid)
956

    
957

    
958
def RunInSeparateProcess(fn, *args):
959
  """Runs a function in a separate process.
960

961
  Note: Only boolean return values are supported.
962

963
  @type fn: callable
964
  @param fn: Function to be called
965
  @rtype: bool
966
  @return: Function's result
967

968
  """
969
  pid = os.fork()
970
  if pid == 0:
971
    # Child process
972
    try:
973
      # In case the function uses temporary files
974
      utils_wrapper.ResetTempfileModule()
975

    
976
      # Call function
977
      result = int(bool(fn(*args)))
978
      assert result in (0, 1)
979
    except: # pylint: disable-msg=W0702
980
      logging.exception("Error while calling function in separate process")
981
      # 0 and 1 are reserved for the return value
982
      result = 33
983

    
984
    os._exit(result) # pylint: disable-msg=W0212
985

    
986
  # Parent process
987

    
988
  # Avoid zombies and check exit code
989
  (_, status) = os.waitpid(pid, 0)
990

    
991
  if os.WIFSIGNALED(status):
992
    exitcode = None
993
    signum = os.WTERMSIG(status)
994
  else:
995
    exitcode = os.WEXITSTATUS(status)
996
    signum = None
997

    
998
  if not (exitcode in (0, 1) and signum is None):
999
    raise errors.GenericError("Child program failed (code=%s, signal=%s)" %
1000
                              (exitcode, signum))
1001

    
1002
  return bool(exitcode)
1003

    
1004

    
1005
def CloseFDs(noclose_fds=None):
1006
  """Close file descriptors.
1007

1008
  This closes all file descriptors above 2 (i.e. except
1009
  stdin/out/err).
1010

1011
  @type noclose_fds: list or None
1012
  @param noclose_fds: if given, it denotes a list of file descriptor
1013
      that should not be closed
1014

1015
  """
1016
  # Default maximum for the number of available file descriptors.
1017
  if 'SC_OPEN_MAX' in os.sysconf_names:
1018
    try:
1019
      MAXFD = os.sysconf('SC_OPEN_MAX')
1020
      if MAXFD < 0:
1021
        MAXFD = 1024
1022
    except OSError:
1023
      MAXFD = 1024
1024
  else:
1025
    MAXFD = 1024
1026

    
1027
  maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
1028
  if (maxfd == resource.RLIM_INFINITY):
1029
    maxfd = MAXFD
1030

    
1031
  # Iterate through and close all file descriptors (except the standard ones)
1032
  for fd in range(3, maxfd):
1033
    if noclose_fds and fd in noclose_fds:
1034
      continue
1035
    utils_wrapper.CloseFdNoError(fd)