Statistics
| Branch: | Tag: | Revision:

root / lib / utils / process.py @ 81f7ea25

History | View | Annotate | Download (28.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Utility functions for processes.
22

23
"""
24

    
25

    
26
import os
27
import sys
28
import subprocess
29
import errno
30
import select
31
import logging
32
import signal
33
import resource
34

    
35
from cStringIO import StringIO
36

    
37
from ganeti import errors
38
from ganeti import constants
39
from ganeti import compat
40

    
41
from ganeti.utils import retry as utils_retry
42
from ganeti.utils import wrapper as utils_wrapper
43
from ganeti.utils import text as utils_text
44
from ganeti.utils import io as utils_io
45
from ganeti.utils import algo as utils_algo
46

    
47

    
48
#: when set to True, L{RunCmd} is disabled
49
_no_fork = False
50

    
51
(_TIMEOUT_NONE,
52
 _TIMEOUT_TERM,
53
 _TIMEOUT_KILL) = range(3)
54

    
55

    
56
def DisableFork():
57
  """Disables the use of fork(2).
58

59
  """
60
  global _no_fork # pylint: disable=W0603
61

    
62
  _no_fork = True
63

    
64

    
65
class RunResult(object):
66
  """Holds the result of running external programs.
67

68
  @type exit_code: int
69
  @ivar exit_code: the exit code of the program, or None (if the program
70
      didn't exit())
71
  @type signal: int or None
72
  @ivar signal: the signal that caused the program to finish, or None
73
      (if the program wasn't terminated by a signal)
74
  @type stdout: str
75
  @ivar stdout: the standard output of the program
76
  @type stderr: str
77
  @ivar stderr: the standard error of the program
78
  @type failed: boolean
79
  @ivar failed: True in case the program was
80
      terminated by a signal or exited with a non-zero exit code
81
  @ivar fail_reason: a string detailing the termination reason
82

83
  """
84
  __slots__ = ["exit_code", "signal", "stdout", "stderr",
85
               "failed", "fail_reason", "cmd"]
86

    
87
  def __init__(self, exit_code, signal_, stdout, stderr, cmd, timeout_action,
88
               timeout):
89
    self.cmd = cmd
90
    self.exit_code = exit_code
91
    self.signal = signal_
92
    self.stdout = stdout
93
    self.stderr = stderr
94
    self.failed = (signal_ is not None or exit_code != 0)
95

    
96
    fail_msgs = []
97
    if self.signal is not None:
98
      fail_msgs.append("terminated by signal %s" % self.signal)
99
    elif self.exit_code is not None:
100
      fail_msgs.append("exited with exit code %s" % self.exit_code)
101
    else:
102
      fail_msgs.append("unable to determine termination reason")
103

    
104
    if timeout_action == _TIMEOUT_TERM:
105
      fail_msgs.append("terminated after timeout of %.2f seconds" % timeout)
106
    elif timeout_action == _TIMEOUT_KILL:
107
      fail_msgs.append(("force termination after timeout of %.2f seconds"
108
                        " and linger for another %.2f seconds") %
109
                       (timeout, constants.CHILD_LINGER_TIMEOUT))
110

    
111
    if fail_msgs and self.failed:
112
      self.fail_reason = utils_text.CommaJoin(fail_msgs)
113

    
114
    if self.failed:
115
      logging.debug("Command '%s' failed (%s); output: %s",
116
                    self.cmd, self.fail_reason, self.output)
117

    
118
  def _GetOutput(self):
119
    """Returns the combined stdout and stderr for easier usage.
120

121
    """
122
    return self.stdout + self.stderr
123

    
124
  output = property(_GetOutput, None, None, "Return full output")
125

    
126

    
127
def _BuildCmdEnvironment(env, reset):
128
  """Builds the environment for an external program.
129

130
  """
131
  if reset:
132
    cmd_env = {}
133
  else:
134
    cmd_env = os.environ.copy()
135
    cmd_env["LC_ALL"] = "C"
136

    
137
  if env is not None:
138
    cmd_env.update(env)
139

    
140
  return cmd_env
141

    
142

    
143
def RunCmd(cmd, env=None, output=None, cwd="/", reset_env=False,
144
           interactive=False, timeout=None, noclose_fds=None,
145
           _postfork_fn=None):
146
  """Execute a (shell) command.
147

148
  The command should not read from its standard input, as it will be
149
  closed.
150

151
  @type cmd: string or list
152
  @param cmd: Command to run
153
  @type env: dict
154
  @param env: Additional environment variables
155
  @type output: str
156
  @param output: if desired, the output of the command can be
157
      saved in a file instead of the RunResult instance; this
158
      parameter denotes the file name (if not None)
159
  @type cwd: string
160
  @param cwd: if specified, will be used as the working
161
      directory for the command; the default will be /
162
  @type reset_env: boolean
163
  @param reset_env: whether to reset or keep the default os environment
164
  @type interactive: boolean
165
  @param interactive: whether we pipe stdin, stdout and stderr
166
                      (default behaviour) or run the command interactive
167
  @type timeout: int
168
  @param timeout: If not None, timeout in seconds until child process gets
169
                  killed
170
  @type noclose_fds: list
171
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
172
                      open for the child process
173
  @param _postfork_fn: Callback run after fork but before timeout (unittest)
174
  @rtype: L{RunResult}
175
  @return: RunResult instance
176
  @raise errors.ProgrammerError: if we call this when forks are disabled
177

178
  """
179
  if _no_fork:
180
    raise errors.ProgrammerError("utils.RunCmd() called with fork() disabled")
181

    
182
  if output and interactive:
183
    raise errors.ProgrammerError("Parameters 'output' and 'interactive' can"
184
                                 " not be provided at the same time")
185

    
186
  if isinstance(cmd, basestring):
187
    strcmd = cmd
188
    shell = True
189
  else:
190
    cmd = [str(val) for val in cmd]
191
    strcmd = utils_text.ShellQuoteArgs(cmd)
192
    shell = False
193

    
194
  if output:
195
    logging.debug("RunCmd %s, output file '%s'", strcmd, output)
196
  else:
197
    logging.debug("RunCmd %s", strcmd)
198

    
199
  cmd_env = _BuildCmdEnvironment(env, reset_env)
200

    
201
  try:
202
    if output is None:
203
      out, err, status, timeout_action = _RunCmdPipe(cmd, cmd_env, shell, cwd,
204
                                                     interactive, timeout,
205
                                                     noclose_fds,
206
                                                     _postfork_fn=_postfork_fn)
207
    else:
208
      assert _postfork_fn is None, \
209
          "_postfork_fn not supported if output provided"
210
      timeout_action = _TIMEOUT_NONE
211
      status = _RunCmdFile(cmd, cmd_env, shell, output, cwd, noclose_fds)
212
      out = err = ""
213
  except OSError, err:
214
    if err.errno == errno.ENOENT:
215
      raise errors.OpExecError("Can't execute '%s': not found (%s)" %
216
                               (strcmd, err))
217
    else:
218
      raise
219

    
220
  if status >= 0:
221
    exitcode = status
222
    signal_ = None
223
  else:
224
    exitcode = None
225
    signal_ = -status
226

    
227
  return RunResult(exitcode, signal_, out, err, strcmd, timeout_action, timeout)
228

    
229

    
230
def SetupDaemonEnv(cwd="/", umask=077):
231
  """Setup a daemon's environment.
232

233
  This should be called between the first and second fork, due to
234
  setsid usage.
235

236
  @param cwd: the directory to which to chdir
237
  @param umask: the umask to setup
238

239
  """
240
  os.chdir(cwd)
241
  os.umask(umask)
242
  os.setsid()
243

    
244

    
245
def SetupDaemonFDs(output_file, output_fd):
246
  """Setups up a daemon's file descriptors.
247

248
  @param output_file: if not None, the file to which to redirect
249
      stdout/stderr
250
  @param output_fd: if not None, the file descriptor for stdout/stderr
251

252
  """
253
  # check that at most one is defined
254
  assert [output_file, output_fd].count(None) >= 1
255

    
256
  # Open /dev/null (read-only, only for stdin)
257
  devnull_fd = os.open(os.devnull, os.O_RDONLY)
258

    
259
  output_close = True
260

    
261
  if output_fd is not None:
262
    output_close = False
263
  elif output_file is not None:
264
    # Open output file
265
    try:
266
      output_fd = os.open(output_file,
267
                          os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
268
    except EnvironmentError, err:
269
      raise Exception("Opening output file failed: %s" % err)
270
  else:
271
    output_fd = os.open(os.devnull, os.O_WRONLY)
272

    
273
  # Redirect standard I/O
274
  os.dup2(devnull_fd, 0)
275
  os.dup2(output_fd, 1)
276
  os.dup2(output_fd, 2)
277

    
278
  if devnull_fd > 2:
279
    utils_wrapper.CloseFdNoError(devnull_fd)
280

    
281
  if output_close and output_fd > 2:
282
    utils_wrapper.CloseFdNoError(output_fd)
283

    
284

    
285
def StartDaemon(cmd, env=None, cwd="/", output=None, output_fd=None,
286
                pidfile=None):
287
  """Start a daemon process after forking twice.
288

289
  @type cmd: string or list
290
  @param cmd: Command to run
291
  @type env: dict
292
  @param env: Additional environment variables
293
  @type cwd: string
294
  @param cwd: Working directory for the program
295
  @type output: string
296
  @param output: Path to file in which to save the output
297
  @type output_fd: int
298
  @param output_fd: File descriptor for output
299
  @type pidfile: string
300
  @param pidfile: Process ID file
301
  @rtype: int
302
  @return: Daemon process ID
303
  @raise errors.ProgrammerError: if we call this when forks are disabled
304

305
  """
306
  if _no_fork:
307
    raise errors.ProgrammerError("utils.StartDaemon() called with fork()"
308
                                 " disabled")
309

    
310
  if output and not (bool(output) ^ (output_fd is not None)):
311
    raise errors.ProgrammerError("Only one of 'output' and 'output_fd' can be"
312
                                 " specified")
313

    
314
  if isinstance(cmd, basestring):
315
    cmd = ["/bin/sh", "-c", cmd]
316

    
317
  strcmd = utils_text.ShellQuoteArgs(cmd)
318

    
319
  if output:
320
    logging.debug("StartDaemon %s, output file '%s'", strcmd, output)
321
  else:
322
    logging.debug("StartDaemon %s", strcmd)
323

    
324
  cmd_env = _BuildCmdEnvironment(env, False)
325

    
326
  # Create pipe for sending PID back
327
  (pidpipe_read, pidpipe_write) = os.pipe()
328
  try:
329
    try:
330
      # Create pipe for sending error messages
331
      (errpipe_read, errpipe_write) = os.pipe()
332
      try:
333
        try:
334
          # First fork
335
          pid = os.fork()
336
          if pid == 0:
337
            try:
338
              # Child process, won't return
339
              _StartDaemonChild(errpipe_read, errpipe_write,
340
                                pidpipe_read, pidpipe_write,
341
                                cmd, cmd_env, cwd,
342
                                output, output_fd, pidfile)
343
            finally:
344
              # Well, maybe child process failed
345
              os._exit(1) # pylint: disable=W0212
346
        finally:
347
          utils_wrapper.CloseFdNoError(errpipe_write)
348

    
349
        # Wait for daemon to be started (or an error message to
350
        # arrive) and read up to 100 KB as an error message
351
        errormsg = utils_wrapper.RetryOnSignal(os.read, errpipe_read,
352
                                               100 * 1024)
353
      finally:
354
        utils_wrapper.CloseFdNoError(errpipe_read)
355
    finally:
356
      utils_wrapper.CloseFdNoError(pidpipe_write)
357

    
358
    # Read up to 128 bytes for PID
359
    pidtext = utils_wrapper.RetryOnSignal(os.read, pidpipe_read, 128)
360
  finally:
361
    utils_wrapper.CloseFdNoError(pidpipe_read)
362

    
363
  # Try to avoid zombies by waiting for child process
364
  try:
365
    os.waitpid(pid, 0)
366
  except OSError:
367
    pass
368

    
369
  if errormsg:
370
    raise errors.OpExecError("Error when starting daemon process: %r" %
371
                             errormsg)
372

    
373
  try:
374
    return int(pidtext)
375
  except (ValueError, TypeError), err:
376
    raise errors.OpExecError("Error while trying to parse PID %r: %s" %
377
                             (pidtext, err))
378

    
379

    
380
def _StartDaemonChild(errpipe_read, errpipe_write,
381
                      pidpipe_read, pidpipe_write,
382
                      args, env, cwd,
383
                      output, fd_output, pidfile):
384
  """Child process for starting daemon.
385

386
  """
387
  try:
388
    # Close parent's side
389
    utils_wrapper.CloseFdNoError(errpipe_read)
390
    utils_wrapper.CloseFdNoError(pidpipe_read)
391

    
392
    # First child process
393
    SetupDaemonEnv()
394

    
395
    # And fork for the second time
396
    pid = os.fork()
397
    if pid != 0:
398
      # Exit first child process
399
      os._exit(0) # pylint: disable=W0212
400

    
401
    # Make sure pipe is closed on execv* (and thereby notifies
402
    # original process)
403
    utils_wrapper.SetCloseOnExecFlag(errpipe_write, True)
404

    
405
    # List of file descriptors to be left open
406
    noclose_fds = [errpipe_write]
407

    
408
    # Open PID file
409
    if pidfile:
410
      fd_pidfile = utils_io.WritePidFile(pidfile)
411

    
412
      # Keeping the file open to hold the lock
413
      noclose_fds.append(fd_pidfile)
414

    
415
      utils_wrapper.SetCloseOnExecFlag(fd_pidfile, False)
416
    else:
417
      fd_pidfile = None
418

    
419
    SetupDaemonFDs(output, fd_output)
420

    
421
    # Send daemon PID to parent
422
    utils_wrapper.RetryOnSignal(os.write, pidpipe_write, str(os.getpid()))
423

    
424
    # Close all file descriptors except stdio and error message pipe
425
    CloseFDs(noclose_fds=noclose_fds)
426

    
427
    # Change working directory
428
    os.chdir(cwd)
429

    
430
    if env is None:
431
      os.execvp(args[0], args)
432
    else:
433
      os.execvpe(args[0], args, env)
434
  except: # pylint: disable=W0702
435
    try:
436
      # Report errors to original process
437
      WriteErrorToFD(errpipe_write, str(sys.exc_info()[1]))
438
    except: # pylint: disable=W0702
439
      # Ignore errors in error handling
440
      pass
441

    
442
  os._exit(1) # pylint: disable=W0212
443

    
444

    
445
def WriteErrorToFD(fd, err):
446
  """Possibly write an error message to a fd.
447

448
  @type fd: None or int (file descriptor)
449
  @param fd: if not None, the error will be written to this fd
450
  @param err: string, the error message
451

452
  """
453
  if fd is None:
454
    return
455

    
456
  if not err:
457
    err = "<unknown error>"
458

    
459
  utils_wrapper.RetryOnSignal(os.write, fd, err)
460

    
461

    
462
def _CheckIfAlive(child):
463
  """Raises L{utils_retry.RetryAgain} if child is still alive.
464

465
  @raises utils_retry.RetryAgain: If child is still alive
466

467
  """
468
  if child.poll() is None:
469
    raise utils_retry.RetryAgain()
470

    
471

    
472
def _WaitForProcess(child, timeout):
473
  """Waits for the child to terminate or until we reach timeout.
474

475
  """
476
  try:
477
    utils_retry.Retry(_CheckIfAlive, (1.0, 1.2, 5.0), max(0, timeout),
478
                      args=[child])
479
  except utils_retry.RetryTimeout:
480
    pass
481

    
482

    
483
def _RunCmdPipe(cmd, env, via_shell, cwd, interactive, timeout, noclose_fds,
484
                _linger_timeout=constants.CHILD_LINGER_TIMEOUT,
485
                _postfork_fn=None):
486
  """Run a command and return its output.
487

488
  @type  cmd: string or list
489
  @param cmd: Command to run
490
  @type env: dict
491
  @param env: The environment to use
492
  @type via_shell: bool
493
  @param via_shell: if we should run via the shell
494
  @type cwd: string
495
  @param cwd: the working directory for the program
496
  @type interactive: boolean
497
  @param interactive: Run command interactive (without piping)
498
  @type timeout: int
499
  @param timeout: Timeout after the programm gets terminated
500
  @type noclose_fds: list
501
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
502
                      open for the child process
503
  @param _postfork_fn: Function run after fork but before timeout (unittest)
504
  @rtype: tuple
505
  @return: (out, err, status)
506

507
  """
508
  poller = select.poll()
509

    
510
  stderr = subprocess.PIPE
511
  stdout = subprocess.PIPE
512
  stdin = subprocess.PIPE
513

    
514
  if interactive:
515
    stderr = stdout = stdin = None
516

    
517
  if noclose_fds:
518
    preexec_fn = lambda: CloseFDs(noclose_fds)
519
    close_fds = False
520
  else:
521
    preexec_fn = None
522
    close_fds = True
523

    
524
  child = subprocess.Popen(cmd, shell=via_shell,
525
                           stderr=stderr,
526
                           stdout=stdout,
527
                           stdin=stdin,
528
                           close_fds=close_fds, env=env,
529
                           cwd=cwd,
530
                           preexec_fn=preexec_fn)
531

    
532
  if _postfork_fn:
533
    _postfork_fn(child.pid)
534

    
535
  out = StringIO()
536
  err = StringIO()
537

    
538
  linger_timeout = None
539

    
540
  if timeout is None:
541
    poll_timeout = None
542
  else:
543
    poll_timeout = utils_algo.RunningTimeout(timeout, True).Remaining
544

    
545
  msg_timeout = ("Command %s (%d) run into execution timeout, terminating" %
546
                 (cmd, child.pid))
547
  msg_linger = ("Command %s (%d) run into linger timeout, killing" %
548
                (cmd, child.pid))
549

    
550
  timeout_action = _TIMEOUT_NONE
551

    
552
  if not interactive:
553
    child.stdin.close()
554
    poller.register(child.stdout, select.POLLIN)
555
    poller.register(child.stderr, select.POLLIN)
556
    fdmap = {
557
      child.stdout.fileno(): (out, child.stdout),
558
      child.stderr.fileno(): (err, child.stderr),
559
      }
560
    for fd in fdmap:
561
      utils_wrapper.SetNonblockFlag(fd, True)
562

    
563
    while fdmap:
564
      if poll_timeout:
565
        pt = poll_timeout() * 1000
566
        if pt < 0:
567
          if linger_timeout is None:
568
            logging.warning(msg_timeout)
569
            if child.poll() is None:
570
              timeout_action = _TIMEOUT_TERM
571
              utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid,
572
                                                  signal.SIGTERM)
573
            linger_timeout = \
574
              utils_algo.RunningTimeout(_linger_timeout, True).Remaining
575
          pt = linger_timeout() * 1000
576
          if pt < 0:
577
            break
578
      else:
579
        pt = None
580

    
581
      pollresult = utils_wrapper.RetryOnSignal(poller.poll, pt)
582

    
583
      for fd, event in pollresult:
584
        if event & select.POLLIN or event & select.POLLPRI:
585
          data = fdmap[fd][1].read()
586
          # no data from read signifies EOF (the same as POLLHUP)
587
          if not data:
588
            poller.unregister(fd)
589
            del fdmap[fd]
590
            continue
591
          fdmap[fd][0].write(data)
592
        if (event & select.POLLNVAL or event & select.POLLHUP or
593
            event & select.POLLERR):
594
          poller.unregister(fd)
595
          del fdmap[fd]
596

    
597
  if timeout is not None:
598
    assert callable(poll_timeout)
599

    
600
    # We have no I/O left but it might still run
601
    if child.poll() is None:
602
      _WaitForProcess(child, poll_timeout())
603

    
604
    # Terminate if still alive after timeout
605
    if child.poll() is None:
606
      if linger_timeout is None:
607
        logging.warning(msg_timeout)
608
        timeout_action = _TIMEOUT_TERM
609
        utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGTERM)
610
        lt = _linger_timeout
611
      else:
612
        lt = linger_timeout()
613
      _WaitForProcess(child, lt)
614

    
615
    # Okay, still alive after timeout and linger timeout? Kill it!
616
    if child.poll() is None:
617
      timeout_action = _TIMEOUT_KILL
618
      logging.warning(msg_linger)
619
      utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGKILL)
620

    
621
  out = out.getvalue()
622
  err = err.getvalue()
623

    
624
  status = child.wait()
625
  return out, err, status, timeout_action
626

    
627

    
628
def _RunCmdFile(cmd, env, via_shell, output, cwd, noclose_fds):
629
  """Run a command and save its output to a file.
630

631
  @type  cmd: string or list
632
  @param cmd: Command to run
633
  @type env: dict
634
  @param env: The environment to use
635
  @type via_shell: bool
636
  @param via_shell: if we should run via the shell
637
  @type output: str
638
  @param output: the filename in which to save the output
639
  @type cwd: string
640
  @param cwd: the working directory for the program
641
  @type noclose_fds: list
642
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
643
                      open for the child process
644
  @rtype: int
645
  @return: the exit status
646

647
  """
648
  fh = open(output, "a")
649

    
650
  if noclose_fds:
651
    preexec_fn = lambda: CloseFDs(noclose_fds + [fh.fileno()])
652
    close_fds = False
653
  else:
654
    preexec_fn = None
655
    close_fds = True
656

    
657
  try:
658
    child = subprocess.Popen(cmd, shell=via_shell,
659
                             stderr=subprocess.STDOUT,
660
                             stdout=fh,
661
                             stdin=subprocess.PIPE,
662
                             close_fds=close_fds, env=env,
663
                             cwd=cwd,
664
                             preexec_fn=preexec_fn)
665

    
666
    child.stdin.close()
667
    status = child.wait()
668
  finally:
669
    fh.close()
670
  return status
671

    
672

    
673
def RunParts(dir_name, env=None, reset_env=False):
674
  """Run Scripts or programs in a directory
675

676
  @type dir_name: string
677
  @param dir_name: absolute path to a directory
678
  @type env: dict
679
  @param env: The environment to use
680
  @type reset_env: boolean
681
  @param reset_env: whether to reset or keep the default os environment
682
  @rtype: list of tuples
683
  @return: list of (name, (one of RUNDIR_STATUS), RunResult)
684

685
  """
686
  rr = []
687

    
688
  try:
689
    dir_contents = utils_io.ListVisibleFiles(dir_name)
690
  except OSError, err:
691
    logging.warning("RunParts: skipping %s (cannot list: %s)", dir_name, err)
692
    return rr
693

    
694
  for relname in sorted(dir_contents):
695
    fname = utils_io.PathJoin(dir_name, relname)
696
    if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
697
            constants.EXT_PLUGIN_MASK.match(relname) is not None):
698
      rr.append((relname, constants.RUNPARTS_SKIP, None))
699
    else:
700
      try:
701
        result = RunCmd([fname], env=env, reset_env=reset_env)
702
      except Exception, err: # pylint: disable=W0703
703
        rr.append((relname, constants.RUNPARTS_ERR, str(err)))
704
      else:
705
        rr.append((relname, constants.RUNPARTS_RUN, result))
706

    
707
  return rr
708

    
709

    
710
def _GetProcStatusPath(pid):
711
  """Returns the path for a PID's proc status file.
712

713
  @type pid: int
714
  @param pid: Process ID
715
  @rtype: string
716

717
  """
718
  return "/proc/%d/status" % pid
719

    
720

    
721
def IsProcessAlive(pid):
722
  """Check if a given pid exists on the system.
723

724
  @note: zombie status is not handled, so zombie processes
725
      will be returned as alive
726
  @type pid: int
727
  @param pid: the process ID to check
728
  @rtype: boolean
729
  @return: True if the process exists
730

731
  """
732
  def _TryStat(name):
733
    try:
734
      os.stat(name)
735
      return True
736
    except EnvironmentError, err:
737
      if err.errno in (errno.ENOENT, errno.ENOTDIR):
738
        return False
739
      elif err.errno == errno.EINVAL:
740
        raise utils_retry.RetryAgain(err)
741
      raise
742

    
743
  assert isinstance(pid, int), "pid must be an integer"
744
  if pid <= 0:
745
    return False
746

    
747
  # /proc in a multiprocessor environment can have strange behaviors.
748
  # Retry the os.stat a few times until we get a good result.
749
  try:
750
    return utils_retry.Retry(_TryStat, (0.01, 1.5, 0.1), 0.5,
751
                             args=[_GetProcStatusPath(pid)])
752
  except utils_retry.RetryTimeout, err:
753
    err.RaiseInner()
754

    
755

    
756
def _ParseSigsetT(sigset):
757
  """Parse a rendered sigset_t value.
758

759
  This is the opposite of the Linux kernel's fs/proc/array.c:render_sigset_t
760
  function.
761

762
  @type sigset: string
763
  @param sigset: Rendered signal set from /proc/$pid/status
764
  @rtype: set
765
  @return: Set of all enabled signal numbers
766

767
  """
768
  result = set()
769

    
770
  signum = 0
771
  for ch in reversed(sigset):
772
    chv = int(ch, 16)
773

    
774
    # The following could be done in a loop, but it's easier to read and
775
    # understand in the unrolled form
776
    if chv & 1:
777
      result.add(signum + 1)
778
    if chv & 2:
779
      result.add(signum + 2)
780
    if chv & 4:
781
      result.add(signum + 3)
782
    if chv & 8:
783
      result.add(signum + 4)
784

    
785
    signum += 4
786

    
787
  return result
788

    
789

    
790
def _GetProcStatusField(pstatus, field):
791
  """Retrieves a field from the contents of a proc status file.
792

793
  @type pstatus: string
794
  @param pstatus: Contents of /proc/$pid/status
795
  @type field: string
796
  @param field: Name of field whose value should be returned
797
  @rtype: string
798

799
  """
800
  for line in pstatus.splitlines():
801
    parts = line.split(":", 1)
802

    
803
    if len(parts) < 2 or parts[0] != field:
804
      continue
805

    
806
    return parts[1].strip()
807

    
808
  return None
809

    
810

    
811
def IsProcessHandlingSignal(pid, signum, status_path=None):
812
  """Checks whether a process is handling a signal.
813

814
  @type pid: int
815
  @param pid: Process ID
816
  @type signum: int
817
  @param signum: Signal number
818
  @rtype: bool
819

820
  """
821
  if status_path is None:
822
    status_path = _GetProcStatusPath(pid)
823

    
824
  try:
825
    proc_status = utils_io.ReadFile(status_path)
826
  except EnvironmentError, err:
827
    # In at least one case, reading /proc/$pid/status failed with ESRCH.
828
    if err.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL, errno.ESRCH):
829
      return False
830
    raise
831

    
832
  sigcgt = _GetProcStatusField(proc_status, "SigCgt")
833
  if sigcgt is None:
834
    raise RuntimeError("%s is missing 'SigCgt' field" % status_path)
835

    
836
  # Now check whether signal is handled
837
  return signum in _ParseSigsetT(sigcgt)
838

    
839

    
840
def Daemonize(logfile):
841
  """Daemonize the current process.
842

843
  This detaches the current process from the controlling terminal and
844
  runs it in the background as a daemon.
845

846
  @type logfile: str
847
  @param logfile: the logfile to which we should redirect stdout/stderr
848
  @rtype: tuple; (int, callable)
849
  @return: File descriptor of pipe(2) which must be closed to notify parent
850
    process and a callable to reopen log files
851

852
  """
853
  # pylint: disable=W0212
854
  # yes, we really want os._exit
855

    
856
  # TODO: do another attempt to merge Daemonize and StartDaemon, or at
857
  # least abstract the pipe functionality between them
858

    
859
  # Create pipe for sending error messages
860
  (rpipe, wpipe) = os.pipe()
861

    
862
  # this might fail
863
  pid = os.fork()
864
  if (pid == 0):  # The first child.
865
    SetupDaemonEnv()
866

    
867
    # this might fail
868
    pid = os.fork() # Fork a second child.
869
    if (pid == 0):  # The second child.
870
      utils_wrapper.CloseFdNoError(rpipe)
871
    else:
872
      # exit() or _exit()?  See below.
873
      os._exit(0) # Exit parent (the first child) of the second child.
874
  else:
875
    utils_wrapper.CloseFdNoError(wpipe)
876
    # Wait for daemon to be started (or an error message to
877
    # arrive) and read up to 100 KB as an error message
878
    errormsg = utils_wrapper.RetryOnSignal(os.read, rpipe, 100 * 1024)
879
    if errormsg:
880
      sys.stderr.write("Error when starting daemon process: %r\n" % errormsg)
881
      rcode = 1
882
    else:
883
      rcode = 0
884
    os._exit(rcode) # Exit parent of the first child.
885

    
886
  reopen_fn = compat.partial(SetupDaemonFDs, logfile, None)
887

    
888
  # Open logs for the first time
889
  reopen_fn()
890

    
891
  return (wpipe, reopen_fn)
892

    
893

    
894
def KillProcess(pid, signal_=signal.SIGTERM, timeout=30,
895
                waitpid=False):
896
  """Kill a process given by its pid.
897

898
  @type pid: int
899
  @param pid: The PID to terminate.
900
  @type signal_: int
901
  @param signal_: The signal to send, by default SIGTERM
902
  @type timeout: int
903
  @param timeout: The timeout after which, if the process is still alive,
904
                  a SIGKILL will be sent. If not positive, no such checking
905
                  will be done
906
  @type waitpid: boolean
907
  @param waitpid: If true, we should waitpid on this process after
908
      sending signals, since it's our own child and otherwise it
909
      would remain as zombie
910

911
  """
912
  def _helper(pid, signal_, wait):
913
    """Simple helper to encapsulate the kill/waitpid sequence"""
914
    if utils_wrapper.IgnoreProcessNotFound(os.kill, pid, signal_) and wait:
915
      try:
916
        os.waitpid(pid, os.WNOHANG)
917
      except OSError:
918
        pass
919

    
920
  if pid <= 0:
921
    # kill with pid=0 == suicide
922
    raise errors.ProgrammerError("Invalid pid given '%s'" % pid)
923

    
924
  if not IsProcessAlive(pid):
925
    return
926

    
927
  _helper(pid, signal_, waitpid)
928

    
929
  if timeout <= 0:
930
    return
931

    
932
  def _CheckProcess():
933
    if not IsProcessAlive(pid):
934
      return
935

    
936
    try:
937
      (result_pid, _) = os.waitpid(pid, os.WNOHANG)
938
    except OSError:
939
      raise utils_retry.RetryAgain()
940

    
941
    if result_pid > 0:
942
      return
943

    
944
    raise utils_retry.RetryAgain()
945

    
946
  try:
947
    # Wait up to $timeout seconds
948
    utils_retry.Retry(_CheckProcess, (0.01, 1.5, 0.1), timeout)
949
  except utils_retry.RetryTimeout:
950
    pass
951

    
952
  if IsProcessAlive(pid):
953
    # Kill process if it's still alive
954
    _helper(pid, signal.SIGKILL, waitpid)
955

    
956

    
957
def RunInSeparateProcess(fn, *args):
958
  """Runs a function in a separate process.
959

960
  Note: Only boolean return values are supported.
961

962
  @type fn: callable
963
  @param fn: Function to be called
964
  @rtype: bool
965
  @return: Function's result
966

967
  """
968
  pid = os.fork()
969
  if pid == 0:
970
    # Child process
971
    try:
972
      # In case the function uses temporary files
973
      utils_wrapper.ResetTempfileModule()
974

    
975
      # Call function
976
      result = int(bool(fn(*args)))
977
      assert result in (0, 1)
978
    except: # pylint: disable=W0702
979
      logging.exception("Error while calling function in separate process")
980
      # 0 and 1 are reserved for the return value
981
      result = 33
982

    
983
    os._exit(result) # pylint: disable=W0212
984

    
985
  # Parent process
986

    
987
  # Avoid zombies and check exit code
988
  (_, status) = os.waitpid(pid, 0)
989

    
990
  if os.WIFSIGNALED(status):
991
    exitcode = None
992
    signum = os.WTERMSIG(status)
993
  else:
994
    exitcode = os.WEXITSTATUS(status)
995
    signum = None
996

    
997
  if not (exitcode in (0, 1) and signum is None):
998
    raise errors.GenericError("Child program failed (code=%s, signal=%s)" %
999
                              (exitcode, signum))
1000

    
1001
  return bool(exitcode)
1002

    
1003

    
1004
def CloseFDs(noclose_fds=None):
1005
  """Close file descriptors.
1006

1007
  This closes all file descriptors above 2 (i.e. except
1008
  stdin/out/err).
1009

1010
  @type noclose_fds: list or None
1011
  @param noclose_fds: if given, it denotes a list of file descriptor
1012
      that should not be closed
1013

1014
  """
1015
  # Default maximum for the number of available file descriptors.
1016
  if 'SC_OPEN_MAX' in os.sysconf_names:
1017
    try:
1018
      MAXFD = os.sysconf('SC_OPEN_MAX')
1019
      if MAXFD < 0:
1020
        MAXFD = 1024
1021
    except OSError:
1022
      MAXFD = 1024
1023
  else:
1024
    MAXFD = 1024
1025

    
1026
  maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
1027
  if (maxfd == resource.RLIM_INFINITY):
1028
    maxfd = MAXFD
1029

    
1030
  # Iterate through and close all file descriptors (except the standard ones)
1031
  for fd in range(3, maxfd):
1032
    if noclose_fds and fd in noclose_fds:
1033
      continue
1034
    utils_wrapper.CloseFdNoError(fd)