Statistics
| Branch: | Tag: | Revision:

root / lib / utils / process.py @ 110f49ef

History | View | Annotate | Download (28.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Utility functions for processes.
22

23
"""
24

    
25

    
26
import os
27
import sys
28
import subprocess
29
import errno
30
import select
31
import logging
32
import signal
33
import resource
34

    
35
from cStringIO import StringIO
36

    
37
from ganeti import errors
38
from ganeti import constants
39
from ganeti import compat
40

    
41
from ganeti.utils import retry as utils_retry
42
from ganeti.utils import wrapper as utils_wrapper
43
from ganeti.utils import text as utils_text
44
from ganeti.utils import io as utils_io
45
from ganeti.utils import algo as utils_algo
46

    
47

    
48
#: when set to True, L{RunCmd} is disabled
49
_no_fork = False
50

    
51
(_TIMEOUT_NONE,
52
 _TIMEOUT_TERM,
53
 _TIMEOUT_KILL) = range(3)
54

    
55

    
56
def DisableFork():
57
  """Disables the use of fork(2).
58

59
  """
60
  global _no_fork # pylint: disable-msg=W0603
61

    
62
  _no_fork = True
63

    
64

    
65
class RunResult(object):
66
  """Holds the result of running external programs.
67

68
  @type exit_code: int
69
  @ivar exit_code: the exit code of the program, or None (if the program
70
      didn't exit())
71
  @type signal: int or None
72
  @ivar signal: the signal that caused the program to finish, or None
73
      (if the program wasn't terminated by a signal)
74
  @type stdout: str
75
  @ivar stdout: the standard output of the program
76
  @type stderr: str
77
  @ivar stderr: the standard error of the program
78
  @type failed: boolean
79
  @ivar failed: True in case the program was
80
      terminated by a signal or exited with a non-zero exit code
81
  @ivar fail_reason: a string detailing the termination reason
82

83
  """
84
  __slots__ = ["exit_code", "signal", "stdout", "stderr",
85
               "failed", "fail_reason", "cmd"]
86

    
87

    
88
  def __init__(self, exit_code, signal_, stdout, stderr, cmd, timeout_action,
89
               timeout):
90
    self.cmd = cmd
91
    self.exit_code = exit_code
92
    self.signal = signal_
93
    self.stdout = stdout
94
    self.stderr = stderr
95
    self.failed = (signal_ is not None or exit_code != 0)
96

    
97
    fail_msgs = []
98
    if self.signal is not None:
99
      fail_msgs.append("terminated by signal %s" % self.signal)
100
    elif self.exit_code is not None:
101
      fail_msgs.append("exited with exit code %s" % self.exit_code)
102
    else:
103
      fail_msgs.append("unable to determine termination reason")
104

    
105
    if timeout_action == _TIMEOUT_TERM:
106
      fail_msgs.append("terminated after timeout of %.2f seconds" % timeout)
107
    elif timeout_action == _TIMEOUT_KILL:
108
      fail_msgs.append(("force termination after timeout of %.2f seconds"
109
                        " and linger for another %.2f seconds") %
110
                       (timeout, constants.CHILD_LINGER_TIMEOUT))
111

    
112
    if fail_msgs and self.failed:
113
      self.fail_reason = utils_text.CommaJoin(fail_msgs)
114

    
115
    if self.failed:
116
      logging.debug("Command '%s' failed (%s); output: %s",
117
                    self.cmd, self.fail_reason, self.output)
118

    
119
  def _GetOutput(self):
120
    """Returns the combined stdout and stderr for easier usage.
121

122
    """
123
    return self.stdout + self.stderr
124

    
125
  output = property(_GetOutput, None, None, "Return full output")
126

    
127

    
128
def _BuildCmdEnvironment(env, reset):
129
  """Builds the environment for an external program.
130

131
  """
132
  if reset:
133
    cmd_env = {}
134
  else:
135
    cmd_env = os.environ.copy()
136
    cmd_env["LC_ALL"] = "C"
137

    
138
  if env is not None:
139
    cmd_env.update(env)
140

    
141
  return cmd_env
142

    
143

    
144
def RunCmd(cmd, env=None, output=None, cwd="/", reset_env=False,
145
           interactive=False, timeout=None, noclose_fds=None):
146
  """Execute a (shell) command.
147

148
  The command should not read from its standard input, as it will be
149
  closed.
150

151
  @type cmd: string or list
152
  @param cmd: Command to run
153
  @type env: dict
154
  @param env: Additional environment variables
155
  @type output: str
156
  @param output: if desired, the output of the command can be
157
      saved in a file instead of the RunResult instance; this
158
      parameter denotes the file name (if not None)
159
  @type cwd: string
160
  @param cwd: if specified, will be used as the working
161
      directory for the command; the default will be /
162
  @type reset_env: boolean
163
  @param reset_env: whether to reset or keep the default os environment
164
  @type interactive: boolean
165
  @param interactive: weather we pipe stdin, stdout and stderr
166
                      (default behaviour) or run the command interactive
167
  @type timeout: int
168
  @param timeout: If not None, timeout in seconds until child process gets
169
                  killed
170
  @type noclose_fds: list
171
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
172
                      open for the child process
173
  @rtype: L{RunResult}
174
  @return: RunResult instance
175
  @raise errors.ProgrammerError: if we call this when forks are disabled
176

177
  """
178
  if _no_fork:
179
    raise errors.ProgrammerError("utils.RunCmd() called with fork() disabled")
180

    
181
  if output and interactive:
182
    raise errors.ProgrammerError("Parameters 'output' and 'interactive' can"
183
                                 " not be provided at the same time")
184

    
185
  if isinstance(cmd, basestring):
186
    strcmd = cmd
187
    shell = True
188
  else:
189
    cmd = [str(val) for val in cmd]
190
    strcmd = utils_text.ShellQuoteArgs(cmd)
191
    shell = False
192

    
193
  if output:
194
    logging.debug("RunCmd %s, output file '%s'", strcmd, output)
195
  else:
196
    logging.debug("RunCmd %s", strcmd)
197

    
198
  cmd_env = _BuildCmdEnvironment(env, reset_env)
199

    
200
  try:
201
    if output is None:
202
      out, err, status, timeout_action = _RunCmdPipe(cmd, cmd_env, shell, cwd,
203
                                                     interactive, timeout,
204
                                                     noclose_fds)
205
    else:
206
      timeout_action = _TIMEOUT_NONE
207
      status = _RunCmdFile(cmd, cmd_env, shell, output, cwd, noclose_fds)
208
      out = err = ""
209
  except OSError, err:
210
    if err.errno == errno.ENOENT:
211
      raise errors.OpExecError("Can't execute '%s': not found (%s)" %
212
                               (strcmd, err))
213
    else:
214
      raise
215

    
216
  if status >= 0:
217
    exitcode = status
218
    signal_ = None
219
  else:
220
    exitcode = None
221
    signal_ = -status
222

    
223
  return RunResult(exitcode, signal_, out, err, strcmd, timeout_action, timeout)
224

    
225

    
226
def SetupDaemonEnv(cwd="/", umask=077):
227
  """Setup a daemon's environment.
228

229
  This should be called between the first and second fork, due to
230
  setsid usage.
231

232
  @param cwd: the directory to which to chdir
233
  @param umask: the umask to setup
234

235
  """
236
  os.chdir(cwd)
237
  os.umask(umask)
238
  os.setsid()
239

    
240

    
241
def SetupDaemonFDs(output_file, output_fd):
242
  """Setups up a daemon's file descriptors.
243

244
  @param output_file: if not None, the file to which to redirect
245
      stdout/stderr
246
  @param output_fd: if not None, the file descriptor for stdout/stderr
247

248
  """
249
  # check that at most one is defined
250
  assert [output_file, output_fd].count(None) >= 1
251

    
252
  # Open /dev/null (read-only, only for stdin)
253
  devnull_fd = os.open(os.devnull, os.O_RDONLY)
254

    
255
  output_close = True
256

    
257
  if output_fd is not None:
258
    output_close = False
259
  elif output_file is not None:
260
    # Open output file
261
    try:
262
      output_fd = os.open(output_file,
263
                          os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
264
    except EnvironmentError, err:
265
      raise Exception("Opening output file failed: %s" % err)
266
  else:
267
    output_fd = os.open(os.devnull, os.O_WRONLY)
268

    
269
  # Redirect standard I/O
270
  os.dup2(devnull_fd, 0)
271
  os.dup2(output_fd, 1)
272
  os.dup2(output_fd, 2)
273

    
274
  if devnull_fd > 2:
275
    utils_wrapper.CloseFdNoError(devnull_fd)
276

    
277
  if output_close and output_fd > 2:
278
    utils_wrapper.CloseFdNoError(output_fd)
279

    
280

    
281
def StartDaemon(cmd, env=None, cwd="/", output=None, output_fd=None,
282
                pidfile=None):
283
  """Start a daemon process after forking twice.
284

285
  @type cmd: string or list
286
  @param cmd: Command to run
287
  @type env: dict
288
  @param env: Additional environment variables
289
  @type cwd: string
290
  @param cwd: Working directory for the program
291
  @type output: string
292
  @param output: Path to file in which to save the output
293
  @type output_fd: int
294
  @param output_fd: File descriptor for output
295
  @type pidfile: string
296
  @param pidfile: Process ID file
297
  @rtype: int
298
  @return: Daemon process ID
299
  @raise errors.ProgrammerError: if we call this when forks are disabled
300

301
  """
302
  if _no_fork:
303
    raise errors.ProgrammerError("utils.StartDaemon() called with fork()"
304
                                 " disabled")
305

    
306
  if output and not (bool(output) ^ (output_fd is not None)):
307
    raise errors.ProgrammerError("Only one of 'output' and 'output_fd' can be"
308
                                 " specified")
309

    
310
  if isinstance(cmd, basestring):
311
    cmd = ["/bin/sh", "-c", cmd]
312

    
313
  strcmd = utils_text.ShellQuoteArgs(cmd)
314

    
315
  if output:
316
    logging.debug("StartDaemon %s, output file '%s'", strcmd, output)
317
  else:
318
    logging.debug("StartDaemon %s", strcmd)
319

    
320
  cmd_env = _BuildCmdEnvironment(env, False)
321

    
322
  # Create pipe for sending PID back
323
  (pidpipe_read, pidpipe_write) = os.pipe()
324
  try:
325
    try:
326
      # Create pipe for sending error messages
327
      (errpipe_read, errpipe_write) = os.pipe()
328
      try:
329
        try:
330
          # First fork
331
          pid = os.fork()
332
          if pid == 0:
333
            try:
334
              # Child process, won't return
335
              _StartDaemonChild(errpipe_read, errpipe_write,
336
                                pidpipe_read, pidpipe_write,
337
                                cmd, cmd_env, cwd,
338
                                output, output_fd, pidfile)
339
            finally:
340
              # Well, maybe child process failed
341
              os._exit(1) # pylint: disable-msg=W0212
342
        finally:
343
          utils_wrapper.CloseFdNoError(errpipe_write)
344

    
345
        # Wait for daemon to be started (or an error message to
346
        # arrive) and read up to 100 KB as an error message
347
        errormsg = utils_wrapper.RetryOnSignal(os.read, errpipe_read,
348
                                               100 * 1024)
349
      finally:
350
        utils_wrapper.CloseFdNoError(errpipe_read)
351
    finally:
352
      utils_wrapper.CloseFdNoError(pidpipe_write)
353

    
354
    # Read up to 128 bytes for PID
355
    pidtext = utils_wrapper.RetryOnSignal(os.read, pidpipe_read, 128)
356
  finally:
357
    utils_wrapper.CloseFdNoError(pidpipe_read)
358

    
359
  # Try to avoid zombies by waiting for child process
360
  try:
361
    os.waitpid(pid, 0)
362
  except OSError:
363
    pass
364

    
365
  if errormsg:
366
    raise errors.OpExecError("Error when starting daemon process: %r" %
367
                             errormsg)
368

    
369
  try:
370
    return int(pidtext)
371
  except (ValueError, TypeError), err:
372
    raise errors.OpExecError("Error while trying to parse PID %r: %s" %
373
                             (pidtext, err))
374

    
375

    
376
def _StartDaemonChild(errpipe_read, errpipe_write,
377
                      pidpipe_read, pidpipe_write,
378
                      args, env, cwd,
379
                      output, fd_output, pidfile):
380
  """Child process for starting daemon.
381

382
  """
383
  try:
384
    # Close parent's side
385
    utils_wrapper.CloseFdNoError(errpipe_read)
386
    utils_wrapper.CloseFdNoError(pidpipe_read)
387

    
388
    # First child process
389
    SetupDaemonEnv()
390

    
391
    # And fork for the second time
392
    pid = os.fork()
393
    if pid != 0:
394
      # Exit first child process
395
      os._exit(0) # pylint: disable-msg=W0212
396

    
397
    # Make sure pipe is closed on execv* (and thereby notifies
398
    # original process)
399
    utils_wrapper.SetCloseOnExecFlag(errpipe_write, True)
400

    
401
    # List of file descriptors to be left open
402
    noclose_fds = [errpipe_write]
403

    
404
    # Open PID file
405
    if pidfile:
406
      fd_pidfile = utils_io.WritePidFile(pidfile)
407

    
408
      # Keeping the file open to hold the lock
409
      noclose_fds.append(fd_pidfile)
410

    
411
      utils_wrapper.SetCloseOnExecFlag(fd_pidfile, False)
412
    else:
413
      fd_pidfile = None
414

    
415
    SetupDaemonFDs(output, fd_output)
416

    
417
    # Send daemon PID to parent
418
    utils_wrapper.RetryOnSignal(os.write, pidpipe_write, str(os.getpid()))
419

    
420
    # Close all file descriptors except stdio and error message pipe
421
    CloseFDs(noclose_fds=noclose_fds)
422

    
423
    # Change working directory
424
    os.chdir(cwd)
425

    
426
    if env is None:
427
      os.execvp(args[0], args)
428
    else:
429
      os.execvpe(args[0], args, env)
430
  except: # pylint: disable-msg=W0702
431
    try:
432
      # Report errors to original process
433
      WriteErrorToFD(errpipe_write, str(sys.exc_info()[1]))
434
    except: # pylint: disable-msg=W0702
435
      # Ignore errors in error handling
436
      pass
437

    
438
  os._exit(1) # pylint: disable-msg=W0212
439

    
440

    
441
def WriteErrorToFD(fd, err):
442
  """Possibly write an error message to a fd.
443

444
  @type fd: None or int (file descriptor)
445
  @param fd: if not None, the error will be written to this fd
446
  @param err: string, the error message
447

448
  """
449
  if fd is None:
450
    return
451

    
452
  if not err:
453
    err = "<unknown error>"
454

    
455
  utils_wrapper.RetryOnSignal(os.write, fd, err)
456

    
457

    
458
def _CheckIfAlive(child):
459
  """Raises L{utils_retry.RetryAgain} if child is still alive.
460

461
  @raises utils_retry.RetryAgain: If child is still alive
462

463
  """
464
  if child.poll() is None:
465
    raise utils_retry.RetryAgain()
466

    
467

    
468
def _WaitForProcess(child, timeout):
469
  """Waits for the child to terminate or until we reach timeout.
470

471
  """
472
  try:
473
    utils_retry.Retry(_CheckIfAlive, (1.0, 1.2, 5.0), max(0, timeout),
474
                      args=[child])
475
  except utils_retry.RetryTimeout:
476
    pass
477

    
478

    
479
def _RunCmdPipe(cmd, env, via_shell, cwd, interactive, timeout, noclose_fds,
480
                _linger_timeout=constants.CHILD_LINGER_TIMEOUT):
481
  """Run a command and return its output.
482

483
  @type  cmd: string or list
484
  @param cmd: Command to run
485
  @type env: dict
486
  @param env: The environment to use
487
  @type via_shell: bool
488
  @param via_shell: if we should run via the shell
489
  @type cwd: string
490
  @param cwd: the working directory for the program
491
  @type interactive: boolean
492
  @param interactive: Run command interactive (without piping)
493
  @type timeout: int
494
  @param timeout: Timeout after the programm gets terminated
495
  @type noclose_fds: list
496
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
497
                      open for the child process
498
  @rtype: tuple
499
  @return: (out, err, status)
500

501
  """
502
  poller = select.poll()
503

    
504
  stderr = subprocess.PIPE
505
  stdout = subprocess.PIPE
506
  stdin = subprocess.PIPE
507

    
508
  if interactive:
509
    stderr = stdout = stdin = None
510

    
511
  if noclose_fds:
512
    preexec_fn = lambda: CloseFDs(noclose_fds)
513
    close_fds = False
514
  else:
515
    preexec_fn = None
516
    close_fds = True
517

    
518
  child = subprocess.Popen(cmd, shell=via_shell,
519
                           stderr=stderr,
520
                           stdout=stdout,
521
                           stdin=stdin,
522
                           close_fds=close_fds, env=env,
523
                           cwd=cwd,
524
                           preexec_fn=preexec_fn)
525

    
526
  out = StringIO()
527
  err = StringIO()
528

    
529
  linger_timeout = None
530

    
531
  if timeout is None:
532
    poll_timeout = None
533
  else:
534
    poll_timeout = utils_algo.RunningTimeout(timeout, True).Remaining
535

    
536
  msg_timeout = ("Command %s (%d) run into execution timeout, terminating" %
537
                 (cmd, child.pid))
538
  msg_linger = ("Command %s (%d) run into linger timeout, killing" %
539
                (cmd, child.pid))
540

    
541
  timeout_action = _TIMEOUT_NONE
542

    
543
  if not interactive:
544
    child.stdin.close()
545
    poller.register(child.stdout, select.POLLIN)
546
    poller.register(child.stderr, select.POLLIN)
547
    fdmap = {
548
      child.stdout.fileno(): (out, child.stdout),
549
      child.stderr.fileno(): (err, child.stderr),
550
      }
551
    for fd in fdmap:
552
      utils_wrapper.SetNonblockFlag(fd, True)
553

    
554
    while fdmap:
555
      if poll_timeout:
556
        pt = poll_timeout() * 1000
557
        if pt < 0:
558
          if linger_timeout is None:
559
            logging.warning(msg_timeout)
560
            if child.poll() is None:
561
              timeout_action = _TIMEOUT_TERM
562
              utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid,
563
                                                  signal.SIGTERM)
564
            linger_timeout = \
565
              utils_algo.RunningTimeout(_linger_timeout, True).Remaining
566
          pt = linger_timeout() * 1000
567
          if pt < 0:
568
            break
569
      else:
570
        pt = None
571

    
572
      pollresult = utils_wrapper.RetryOnSignal(poller.poll, pt)
573

    
574
      for fd, event in pollresult:
575
        if event & select.POLLIN or event & select.POLLPRI:
576
          data = fdmap[fd][1].read()
577
          # no data from read signifies EOF (the same as POLLHUP)
578
          if not data:
579
            poller.unregister(fd)
580
            del fdmap[fd]
581
            continue
582
          fdmap[fd][0].write(data)
583
        if (event & select.POLLNVAL or event & select.POLLHUP or
584
            event & select.POLLERR):
585
          poller.unregister(fd)
586
          del fdmap[fd]
587

    
588
  if timeout is not None:
589
    assert callable(poll_timeout)
590

    
591
    # We have no I/O left but it might still run
592
    if child.poll() is None:
593
      _WaitForProcess(child, poll_timeout())
594

    
595
    # Terminate if still alive after timeout
596
    if child.poll() is None:
597
      if linger_timeout is None:
598
        logging.warning(msg_timeout)
599
        timeout_action = _TIMEOUT_TERM
600
        utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGTERM)
601
        lt = _linger_timeout
602
      else:
603
        lt = linger_timeout()
604
      _WaitForProcess(child, lt)
605

    
606
    # Okay, still alive after timeout and linger timeout? Kill it!
607
    if child.poll() is None:
608
      timeout_action = _TIMEOUT_KILL
609
      logging.warning(msg_linger)
610
      utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGKILL)
611

    
612
  out = out.getvalue()
613
  err = err.getvalue()
614

    
615
  status = child.wait()
616
  return out, err, status, timeout_action
617

    
618

    
619
def _RunCmdFile(cmd, env, via_shell, output, cwd, noclose_fds):
620
  """Run a command and save its output to a file.
621

622
  @type  cmd: string or list
623
  @param cmd: Command to run
624
  @type env: dict
625
  @param env: The environment to use
626
  @type via_shell: bool
627
  @param via_shell: if we should run via the shell
628
  @type output: str
629
  @param output: the filename in which to save the output
630
  @type cwd: string
631
  @param cwd: the working directory for the program
632
  @type noclose_fds: list
633
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
634
                      open for the child process
635
  @rtype: int
636
  @return: the exit status
637

638
  """
639
  fh = open(output, "a")
640

    
641
  if noclose_fds:
642
    preexec_fn = lambda: CloseFDs(noclose_fds + [fh.fileno()])
643
    close_fds = False
644
  else:
645
    preexec_fn = None
646
    close_fds = True
647

    
648
  try:
649
    child = subprocess.Popen(cmd, shell=via_shell,
650
                             stderr=subprocess.STDOUT,
651
                             stdout=fh,
652
                             stdin=subprocess.PIPE,
653
                             close_fds=close_fds, env=env,
654
                             cwd=cwd,
655
                             preexec_fn=preexec_fn)
656

    
657
    child.stdin.close()
658
    status = child.wait()
659
  finally:
660
    fh.close()
661
  return status
662

    
663

    
664
def RunParts(dir_name, env=None, reset_env=False):
665
  """Run Scripts or programs in a directory
666

667
  @type dir_name: string
668
  @param dir_name: absolute path to a directory
669
  @type env: dict
670
  @param env: The environment to use
671
  @type reset_env: boolean
672
  @param reset_env: whether to reset or keep the default os environment
673
  @rtype: list of tuples
674
  @return: list of (name, (one of RUNDIR_STATUS), RunResult)
675

676
  """
677
  rr = []
678

    
679
  try:
680
    dir_contents = utils_io.ListVisibleFiles(dir_name)
681
  except OSError, err:
682
    logging.warning("RunParts: skipping %s (cannot list: %s)", dir_name, err)
683
    return rr
684

    
685
  for relname in sorted(dir_contents):
686
    fname = utils_io.PathJoin(dir_name, relname)
687
    if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
688
            constants.EXT_PLUGIN_MASK.match(relname) is not None):
689
      rr.append((relname, constants.RUNPARTS_SKIP, None))
690
    else:
691
      try:
692
        result = RunCmd([fname], env=env, reset_env=reset_env)
693
      except Exception, err: # pylint: disable-msg=W0703
694
        rr.append((relname, constants.RUNPARTS_ERR, str(err)))
695
      else:
696
        rr.append((relname, constants.RUNPARTS_RUN, result))
697

    
698
  return rr
699

    
700

    
701
def _GetProcStatusPath(pid):
702
  """Returns the path for a PID's proc status file.
703

704
  @type pid: int
705
  @param pid: Process ID
706
  @rtype: string
707

708
  """
709
  return "/proc/%d/status" % pid
710

    
711

    
712
def IsProcessAlive(pid):
713
  """Check if a given pid exists on the system.
714

715
  @note: zombie status is not handled, so zombie processes
716
      will be returned as alive
717
  @type pid: int
718
  @param pid: the process ID to check
719
  @rtype: boolean
720
  @return: True if the process exists
721

722
  """
723
  def _TryStat(name):
724
    try:
725
      os.stat(name)
726
      return True
727
    except EnvironmentError, err:
728
      if err.errno in (errno.ENOENT, errno.ENOTDIR):
729
        return False
730
      elif err.errno == errno.EINVAL:
731
        raise utils_retry.RetryAgain(err)
732
      raise
733

    
734
  assert isinstance(pid, int), "pid must be an integer"
735
  if pid <= 0:
736
    return False
737

    
738
  # /proc in a multiprocessor environment can have strange behaviors.
739
  # Retry the os.stat a few times until we get a good result.
740
  try:
741
    return utils_retry.Retry(_TryStat, (0.01, 1.5, 0.1), 0.5,
742
                             args=[_GetProcStatusPath(pid)])
743
  except utils_retry.RetryTimeout, err:
744
    err.RaiseInner()
745

    
746

    
747
def _ParseSigsetT(sigset):
748
  """Parse a rendered sigset_t value.
749

750
  This is the opposite of the Linux kernel's fs/proc/array.c:render_sigset_t
751
  function.
752

753
  @type sigset: string
754
  @param sigset: Rendered signal set from /proc/$pid/status
755
  @rtype: set
756
  @return: Set of all enabled signal numbers
757

758
  """
759
  result = set()
760

    
761
  signum = 0
762
  for ch in reversed(sigset):
763
    chv = int(ch, 16)
764

    
765
    # The following could be done in a loop, but it's easier to read and
766
    # understand in the unrolled form
767
    if chv & 1:
768
      result.add(signum + 1)
769
    if chv & 2:
770
      result.add(signum + 2)
771
    if chv & 4:
772
      result.add(signum + 3)
773
    if chv & 8:
774
      result.add(signum + 4)
775

    
776
    signum += 4
777

    
778
  return result
779

    
780

    
781
def _GetProcStatusField(pstatus, field):
782
  """Retrieves a field from the contents of a proc status file.
783

784
  @type pstatus: string
785
  @param pstatus: Contents of /proc/$pid/status
786
  @type field: string
787
  @param field: Name of field whose value should be returned
788
  @rtype: string
789

790
  """
791
  for line in pstatus.splitlines():
792
    parts = line.split(":", 1)
793

    
794
    if len(parts) < 2 or parts[0] != field:
795
      continue
796

    
797
    return parts[1].strip()
798

    
799
  return None
800

    
801

    
802
def IsProcessHandlingSignal(pid, signum, status_path=None):
803
  """Checks whether a process is handling a signal.
804

805
  @type pid: int
806
  @param pid: Process ID
807
  @type signum: int
808
  @param signum: Signal number
809
  @rtype: bool
810

811
  """
812
  if status_path is None:
813
    status_path = _GetProcStatusPath(pid)
814

    
815
  try:
816
    proc_status = utils_io.ReadFile(status_path)
817
  except EnvironmentError, err:
818
    # In at least one case, reading /proc/$pid/status failed with ESRCH.
819
    if err.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL, errno.ESRCH):
820
      return False
821
    raise
822

    
823
  sigcgt = _GetProcStatusField(proc_status, "SigCgt")
824
  if sigcgt is None:
825
    raise RuntimeError("%s is missing 'SigCgt' field" % status_path)
826

    
827
  # Now check whether signal is handled
828
  return signum in _ParseSigsetT(sigcgt)
829

    
830

    
831
def Daemonize(logfile):
832
  """Daemonize the current process.
833

834
  This detaches the current process from the controlling terminal and
835
  runs it in the background as a daemon.
836

837
  @type logfile: str
838
  @param logfile: the logfile to which we should redirect stdout/stderr
839
  @rtype: tuple; (int, callable)
840
  @return: File descriptor of pipe(2) which must be closed to notify parent
841
    process and a callable to reopen log files
842

843
  """
844
  # pylint: disable-msg=W0212
845
  # yes, we really want os._exit
846

    
847
  # TODO: do another attempt to merge Daemonize and StartDaemon, or at
848
  # least abstract the pipe functionality between them
849

    
850
  # Create pipe for sending error messages
851
  (rpipe, wpipe) = os.pipe()
852

    
853
  # this might fail
854
  pid = os.fork()
855
  if (pid == 0):  # The first child.
856
    SetupDaemonEnv()
857

    
858
    # this might fail
859
    pid = os.fork() # Fork a second child.
860
    if (pid == 0):  # The second child.
861
      utils_wrapper.CloseFdNoError(rpipe)
862
    else:
863
      # exit() or _exit()?  See below.
864
      os._exit(0) # Exit parent (the first child) of the second child.
865
  else:
866
    utils_wrapper.CloseFdNoError(wpipe)
867
    # Wait for daemon to be started (or an error message to
868
    # arrive) and read up to 100 KB as an error message
869
    errormsg = utils_wrapper.RetryOnSignal(os.read, rpipe, 100 * 1024)
870
    if errormsg:
871
      sys.stderr.write("Error when starting daemon process: %r\n" % errormsg)
872
      rcode = 1
873
    else:
874
      rcode = 0
875
    os._exit(rcode) # Exit parent of the first child.
876

    
877
  reopen_fn = compat.partial(SetupDaemonFDs, logfile, None)
878

    
879
  # Open logs for the first time
880
  reopen_fn()
881

    
882
  return (wpipe, reopen_fn)
883

    
884

    
885
def KillProcess(pid, signal_=signal.SIGTERM, timeout=30,
886
                waitpid=False):
887
  """Kill a process given by its pid.
888

889
  @type pid: int
890
  @param pid: The PID to terminate.
891
  @type signal_: int
892
  @param signal_: The signal to send, by default SIGTERM
893
  @type timeout: int
894
  @param timeout: The timeout after which, if the process is still alive,
895
                  a SIGKILL will be sent. If not positive, no such checking
896
                  will be done
897
  @type waitpid: boolean
898
  @param waitpid: If true, we should waitpid on this process after
899
      sending signals, since it's our own child and otherwise it
900
      would remain as zombie
901

902
  """
903
  def _helper(pid, signal_, wait):
904
    """Simple helper to encapsulate the kill/waitpid sequence"""
905
    if utils_wrapper.IgnoreProcessNotFound(os.kill, pid, signal_) and wait:
906
      try:
907
        os.waitpid(pid, os.WNOHANG)
908
      except OSError:
909
        pass
910

    
911
  if pid <= 0:
912
    # kill with pid=0 == suicide
913
    raise errors.ProgrammerError("Invalid pid given '%s'" % pid)
914

    
915
  if not IsProcessAlive(pid):
916
    return
917

    
918
  _helper(pid, signal_, waitpid)
919

    
920
  if timeout <= 0:
921
    return
922

    
923
  def _CheckProcess():
924
    if not IsProcessAlive(pid):
925
      return
926

    
927
    try:
928
      (result_pid, _) = os.waitpid(pid, os.WNOHANG)
929
    except OSError:
930
      raise utils_retry.RetryAgain()
931

    
932
    if result_pid > 0:
933
      return
934

    
935
    raise utils_retry.RetryAgain()
936

    
937
  try:
938
    # Wait up to $timeout seconds
939
    utils_retry.Retry(_CheckProcess, (0.01, 1.5, 0.1), timeout)
940
  except utils_retry.RetryTimeout:
941
    pass
942

    
943
  if IsProcessAlive(pid):
944
    # Kill process if it's still alive
945
    _helper(pid, signal.SIGKILL, waitpid)
946

    
947

    
948
def RunInSeparateProcess(fn, *args):
949
  """Runs a function in a separate process.
950

951
  Note: Only boolean return values are supported.
952

953
  @type fn: callable
954
  @param fn: Function to be called
955
  @rtype: bool
956
  @return: Function's result
957

958
  """
959
  pid = os.fork()
960
  if pid == 0:
961
    # Child process
962
    try:
963
      # In case the function uses temporary files
964
      utils_wrapper.ResetTempfileModule()
965

    
966
      # Call function
967
      result = int(bool(fn(*args)))
968
      assert result in (0, 1)
969
    except: # pylint: disable-msg=W0702
970
      logging.exception("Error while calling function in separate process")
971
      # 0 and 1 are reserved for the return value
972
      result = 33
973

    
974
    os._exit(result) # pylint: disable-msg=W0212
975

    
976
  # Parent process
977

    
978
  # Avoid zombies and check exit code
979
  (_, status) = os.waitpid(pid, 0)
980

    
981
  if os.WIFSIGNALED(status):
982
    exitcode = None
983
    signum = os.WTERMSIG(status)
984
  else:
985
    exitcode = os.WEXITSTATUS(status)
986
    signum = None
987

    
988
  if not (exitcode in (0, 1) and signum is None):
989
    raise errors.GenericError("Child program failed (code=%s, signal=%s)" %
990
                              (exitcode, signum))
991

    
992
  return bool(exitcode)
993

    
994

    
995
def CloseFDs(noclose_fds=None):
996
  """Close file descriptors.
997

998
  This closes all file descriptors above 2 (i.e. except
999
  stdin/out/err).
1000

1001
  @type noclose_fds: list or None
1002
  @param noclose_fds: if given, it denotes a list of file descriptor
1003
      that should not be closed
1004

1005
  """
1006
  # Default maximum for the number of available file descriptors.
1007
  if 'SC_OPEN_MAX' in os.sysconf_names:
1008
    try:
1009
      MAXFD = os.sysconf('SC_OPEN_MAX')
1010
      if MAXFD < 0:
1011
        MAXFD = 1024
1012
    except OSError:
1013
      MAXFD = 1024
1014
  else:
1015
    MAXFD = 1024
1016

    
1017
  maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
1018
  if (maxfd == resource.RLIM_INFINITY):
1019
    maxfd = MAXFD
1020

    
1021
  # Iterate through and close all file descriptors (except the standard ones)
1022
  for fd in range(3, maxfd):
1023
    if noclose_fds and fd in noclose_fds:
1024
      continue
1025
    utils_wrapper.CloseFdNoError(fd)