Statistics
| Branch: | Tag: | Revision:

root / lib / utils / process.py @ 638ac34b

History | View | Annotate | Download (28 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Utility functions for processes.
22

23
"""
24

    
25

    
26
import os
27
import sys
28
import subprocess
29
import errno
30
import select
31
import logging
32
import signal
33
import resource
34

    
35
from cStringIO import StringIO
36

    
37
from ganeti import errors
38
from ganeti import constants
39

    
40
from ganeti.utils import retry as utils_retry
41
from ganeti.utils import wrapper as utils_wrapper
42
from ganeti.utils import text as utils_text
43
from ganeti.utils import io as utils_io
44
from ganeti.utils import algo as utils_algo
45

    
46

    
47
#: when set to True, L{RunCmd} is disabled
48
_no_fork = False
49

    
50
(_TIMEOUT_NONE,
51
 _TIMEOUT_TERM,
52
 _TIMEOUT_KILL) = range(3)
53

    
54

    
55
def DisableFork():
56
  """Disables the use of fork(2).
57

58
  """
59
  global _no_fork # pylint: disable-msg=W0603
60

    
61
  _no_fork = True
62

    
63

    
64
class RunResult(object):
65
  """Holds the result of running external programs.
66

67
  @type exit_code: int
68
  @ivar exit_code: the exit code of the program, or None (if the program
69
      didn't exit())
70
  @type signal: int or None
71
  @ivar signal: the signal that caused the program to finish, or None
72
      (if the program wasn't terminated by a signal)
73
  @type stdout: str
74
  @ivar stdout: the standard output of the program
75
  @type stderr: str
76
  @ivar stderr: the standard error of the program
77
  @type failed: boolean
78
  @ivar failed: True in case the program was
79
      terminated by a signal or exited with a non-zero exit code
80
  @ivar fail_reason: a string detailing the termination reason
81

82
  """
83
  __slots__ = ["exit_code", "signal", "stdout", "stderr",
84
               "failed", "fail_reason", "cmd"]
85

    
86

    
87
  def __init__(self, exit_code, signal_, stdout, stderr, cmd, timeout_action,
88
               timeout):
89
    self.cmd = cmd
90
    self.exit_code = exit_code
91
    self.signal = signal_
92
    self.stdout = stdout
93
    self.stderr = stderr
94
    self.failed = (signal_ is not None or exit_code != 0)
95

    
96
    fail_msgs = []
97
    if self.signal is not None:
98
      fail_msgs.append("terminated by signal %s" % self.signal)
99
    elif self.exit_code is not None:
100
      fail_msgs.append("exited with exit code %s" % self.exit_code)
101
    else:
102
      fail_msgs.append("unable to determine termination reason")
103

    
104
    if timeout_action == _TIMEOUT_TERM:
105
      fail_msgs.append("terminated after timeout of %.2f seconds" % timeout)
106
    elif timeout_action == _TIMEOUT_KILL:
107
      fail_msgs.append(("force termination after timeout of %.2f seconds"
108
                        " and linger for another %.2f seconds") %
109
                       (timeout, constants.CHILD_LINGER_TIMEOUT))
110

    
111
    if fail_msgs and self.failed:
112
      self.fail_reason = utils_text.CommaJoin(fail_msgs)
113

    
114
    if self.failed:
115
      logging.debug("Command '%s' failed (%s); output: %s",
116
                    self.cmd, self.fail_reason, self.output)
117

    
118
  def _GetOutput(self):
119
    """Returns the combined stdout and stderr for easier usage.
120

121
    """
122
    return self.stdout + self.stderr
123

    
124
  output = property(_GetOutput, None, None, "Return full output")
125

    
126

    
127
def _BuildCmdEnvironment(env, reset):
128
  """Builds the environment for an external program.
129

130
  """
131
  if reset:
132
    cmd_env = {}
133
  else:
134
    cmd_env = os.environ.copy()
135
    cmd_env["LC_ALL"] = "C"
136

    
137
  if env is not None:
138
    cmd_env.update(env)
139

    
140
  return cmd_env
141

    
142

    
143
def RunCmd(cmd, env=None, output=None, cwd="/", reset_env=False,
144
           interactive=False, timeout=None, noclose_fds=None):
145
  """Execute a (shell) command.
146

147
  The command should not read from its standard input, as it will be
148
  closed.
149

150
  @type cmd: string or list
151
  @param cmd: Command to run
152
  @type env: dict
153
  @param env: Additional environment variables
154
  @type output: str
155
  @param output: if desired, the output of the command can be
156
      saved in a file instead of the RunResult instance; this
157
      parameter denotes the file name (if not None)
158
  @type cwd: string
159
  @param cwd: if specified, will be used as the working
160
      directory for the command; the default will be /
161
  @type reset_env: boolean
162
  @param reset_env: whether to reset or keep the default os environment
163
  @type interactive: boolean
164
  @param interactive: weather we pipe stdin, stdout and stderr
165
                      (default behaviour) or run the command interactive
166
  @type timeout: int
167
  @param timeout: If not None, timeout in seconds until child process gets
168
                  killed
169
  @type noclose_fds: list
170
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
171
                      open for the child process
172
  @rtype: L{RunResult}
173
  @return: RunResult instance
174
  @raise errors.ProgrammerError: if we call this when forks are disabled
175

176
  """
177
  if _no_fork:
178
    raise errors.ProgrammerError("utils.RunCmd() called with fork() disabled")
179

    
180
  if output and interactive:
181
    raise errors.ProgrammerError("Parameters 'output' and 'interactive' can"
182
                                 " not be provided at the same time")
183

    
184
  if isinstance(cmd, basestring):
185
    strcmd = cmd
186
    shell = True
187
  else:
188
    cmd = [str(val) for val in cmd]
189
    strcmd = utils_text.ShellQuoteArgs(cmd)
190
    shell = False
191

    
192
  if output:
193
    logging.debug("RunCmd %s, output file '%s'", strcmd, output)
194
  else:
195
    logging.debug("RunCmd %s", strcmd)
196

    
197
  cmd_env = _BuildCmdEnvironment(env, reset_env)
198

    
199
  try:
200
    if output is None:
201
      out, err, status, timeout_action = _RunCmdPipe(cmd, cmd_env, shell, cwd,
202
                                                     interactive, timeout,
203
                                                     noclose_fds)
204
    else:
205
      timeout_action = _TIMEOUT_NONE
206
      status = _RunCmdFile(cmd, cmd_env, shell, output, cwd, noclose_fds)
207
      out = err = ""
208
  except OSError, err:
209
    if err.errno == errno.ENOENT:
210
      raise errors.OpExecError("Can't execute '%s': not found (%s)" %
211
                               (strcmd, err))
212
    else:
213
      raise
214

    
215
  if status >= 0:
216
    exitcode = status
217
    signal_ = None
218
  else:
219
    exitcode = None
220
    signal_ = -status
221

    
222
  return RunResult(exitcode, signal_, out, err, strcmd, timeout_action, timeout)
223

    
224

    
225
def SetupDaemonEnv(cwd="/", umask=077):
226
  """Setup a daemon's environment.
227

228
  This should be called between the first and second fork, due to
229
  setsid usage.
230

231
  @param cwd: the directory to which to chdir
232
  @param umask: the umask to setup
233

234
  """
235
  os.chdir(cwd)
236
  os.umask(umask)
237
  os.setsid()
238

    
239

    
240
def SetupDaemonFDs(output_file, output_fd):
241
  """Setups up a daemon's file descriptors.
242

243
  @param output_file: if not None, the file to which to redirect
244
      stdout/stderr
245
  @param output_fd: if not None, the file descriptor for stdout/stderr
246

247
  """
248
  # check that at most one is defined
249
  assert [output_file, output_fd].count(None) >= 1
250

    
251
  # Open /dev/null (read-only, only for stdin)
252
  devnull_fd = os.open(os.devnull, os.O_RDONLY)
253

    
254
  output_close = True
255

    
256
  if output_fd is not None:
257
    output_close = False
258
  elif output_file is not None:
259
    # Open output file
260
    try:
261
      output_fd = os.open(output_file,
262
                          os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
263
    except EnvironmentError, err:
264
      raise Exception("Opening output file failed: %s" % err)
265
  else:
266
    output_fd = os.open(os.devnull, os.O_WRONLY)
267

    
268
  # Redirect standard I/O
269
  os.dup2(devnull_fd, 0)
270
  os.dup2(output_fd, 1)
271
  os.dup2(output_fd, 2)
272

    
273
  if devnull_fd > 2:
274
    utils_wrapper.CloseFdNoError(devnull_fd)
275

    
276
  if output_close and output_fd > 2:
277
    utils_wrapper.CloseFdNoError(output_fd)
278

    
279

    
280
def StartDaemon(cmd, env=None, cwd="/", output=None, output_fd=None,
281
                pidfile=None):
282
  """Start a daemon process after forking twice.
283

284
  @type cmd: string or list
285
  @param cmd: Command to run
286
  @type env: dict
287
  @param env: Additional environment variables
288
  @type cwd: string
289
  @param cwd: Working directory for the program
290
  @type output: string
291
  @param output: Path to file in which to save the output
292
  @type output_fd: int
293
  @param output_fd: File descriptor for output
294
  @type pidfile: string
295
  @param pidfile: Process ID file
296
  @rtype: int
297
  @return: Daemon process ID
298
  @raise errors.ProgrammerError: if we call this when forks are disabled
299

300
  """
301
  if _no_fork:
302
    raise errors.ProgrammerError("utils.StartDaemon() called with fork()"
303
                                 " disabled")
304

    
305
  if output and not (bool(output) ^ (output_fd is not None)):
306
    raise errors.ProgrammerError("Only one of 'output' and 'output_fd' can be"
307
                                 " specified")
308

    
309
  if isinstance(cmd, basestring):
310
    cmd = ["/bin/sh", "-c", cmd]
311

    
312
  strcmd = utils_text.ShellQuoteArgs(cmd)
313

    
314
  if output:
315
    logging.debug("StartDaemon %s, output file '%s'", strcmd, output)
316
  else:
317
    logging.debug("StartDaemon %s", strcmd)
318

    
319
  cmd_env = _BuildCmdEnvironment(env, False)
320

    
321
  # Create pipe for sending PID back
322
  (pidpipe_read, pidpipe_write) = os.pipe()
323
  try:
324
    try:
325
      # Create pipe for sending error messages
326
      (errpipe_read, errpipe_write) = os.pipe()
327
      try:
328
        try:
329
          # First fork
330
          pid = os.fork()
331
          if pid == 0:
332
            try:
333
              # Child process, won't return
334
              _StartDaemonChild(errpipe_read, errpipe_write,
335
                                pidpipe_read, pidpipe_write,
336
                                cmd, cmd_env, cwd,
337
                                output, output_fd, pidfile)
338
            finally:
339
              # Well, maybe child process failed
340
              os._exit(1) # pylint: disable-msg=W0212
341
        finally:
342
          utils_wrapper.CloseFdNoError(errpipe_write)
343

    
344
        # Wait for daemon to be started (or an error message to
345
        # arrive) and read up to 100 KB as an error message
346
        errormsg = utils_wrapper.RetryOnSignal(os.read, errpipe_read,
347
                                               100 * 1024)
348
      finally:
349
        utils_wrapper.CloseFdNoError(errpipe_read)
350
    finally:
351
      utils_wrapper.CloseFdNoError(pidpipe_write)
352

    
353
    # Read up to 128 bytes for PID
354
    pidtext = utils_wrapper.RetryOnSignal(os.read, pidpipe_read, 128)
355
  finally:
356
    utils_wrapper.CloseFdNoError(pidpipe_read)
357

    
358
  # Try to avoid zombies by waiting for child process
359
  try:
360
    os.waitpid(pid, 0)
361
  except OSError:
362
    pass
363

    
364
  if errormsg:
365
    raise errors.OpExecError("Error when starting daemon process: %r" %
366
                             errormsg)
367

    
368
  try:
369
    return int(pidtext)
370
  except (ValueError, TypeError), err:
371
    raise errors.OpExecError("Error while trying to parse PID %r: %s" %
372
                             (pidtext, err))
373

    
374

    
375
def _StartDaemonChild(errpipe_read, errpipe_write,
376
                      pidpipe_read, pidpipe_write,
377
                      args, env, cwd,
378
                      output, fd_output, pidfile):
379
  """Child process for starting daemon.
380

381
  """
382
  try:
383
    # Close parent's side
384
    utils_wrapper.CloseFdNoError(errpipe_read)
385
    utils_wrapper.CloseFdNoError(pidpipe_read)
386

    
387
    # First child process
388
    SetupDaemonEnv()
389

    
390
    # And fork for the second time
391
    pid = os.fork()
392
    if pid != 0:
393
      # Exit first child process
394
      os._exit(0) # pylint: disable-msg=W0212
395

    
396
    # Make sure pipe is closed on execv* (and thereby notifies
397
    # original process)
398
    utils_wrapper.SetCloseOnExecFlag(errpipe_write, True)
399

    
400
    # List of file descriptors to be left open
401
    noclose_fds = [errpipe_write]
402

    
403
    # Open PID file
404
    if pidfile:
405
      fd_pidfile = utils_io.WritePidFile(pidfile)
406

    
407
      # Keeping the file open to hold the lock
408
      noclose_fds.append(fd_pidfile)
409

    
410
      utils_wrapper.SetCloseOnExecFlag(fd_pidfile, False)
411
    else:
412
      fd_pidfile = None
413

    
414
    SetupDaemonFDs(output, fd_output)
415

    
416
    # Send daemon PID to parent
417
    utils_wrapper.RetryOnSignal(os.write, pidpipe_write, str(os.getpid()))
418

    
419
    # Close all file descriptors except stdio and error message pipe
420
    CloseFDs(noclose_fds=noclose_fds)
421

    
422
    # Change working directory
423
    os.chdir(cwd)
424

    
425
    if env is None:
426
      os.execvp(args[0], args)
427
    else:
428
      os.execvpe(args[0], args, env)
429
  except: # pylint: disable-msg=W0702
430
    try:
431
      # Report errors to original process
432
      WriteErrorToFD(errpipe_write, str(sys.exc_info()[1]))
433
    except: # pylint: disable-msg=W0702
434
      # Ignore errors in error handling
435
      pass
436

    
437
  os._exit(1) # pylint: disable-msg=W0212
438

    
439

    
440
def WriteErrorToFD(fd, err):
441
  """Possibly write an error message to a fd.
442

443
  @type fd: None or int (file descriptor)
444
  @param fd: if not None, the error will be written to this fd
445
  @param err: string, the error message
446

447
  """
448
  if fd is None:
449
    return
450

    
451
  if not err:
452
    err = "<unknown error>"
453

    
454
  utils_wrapper.RetryOnSignal(os.write, fd, err)
455

    
456

    
457
def _CheckIfAlive(child):
458
  """Raises L{utils_retry.RetryAgain} if child is still alive.
459

460
  @raises utils_retry.RetryAgain: If child is still alive
461

462
  """
463
  if child.poll() is None:
464
    raise utils_retry.RetryAgain()
465

    
466

    
467
def _WaitForProcess(child, timeout):
468
  """Waits for the child to terminate or until we reach timeout.
469

470
  """
471
  try:
472
    utils_retry.Retry(_CheckIfAlive, (1.0, 1.2, 5.0), max(0, timeout),
473
                      args=[child])
474
  except utils_retry.RetryTimeout:
475
    pass
476

    
477

    
478
def _RunCmdPipe(cmd, env, via_shell, cwd, interactive, timeout, noclose_fds,
479
                _linger_timeout=constants.CHILD_LINGER_TIMEOUT):
480
  """Run a command and return its output.
481

482
  @type  cmd: string or list
483
  @param cmd: Command to run
484
  @type env: dict
485
  @param env: The environment to use
486
  @type via_shell: bool
487
  @param via_shell: if we should run via the shell
488
  @type cwd: string
489
  @param cwd: the working directory for the program
490
  @type interactive: boolean
491
  @param interactive: Run command interactive (without piping)
492
  @type timeout: int
493
  @param timeout: Timeout after the programm gets terminated
494
  @type noclose_fds: list
495
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
496
                      open for the child process
497
  @rtype: tuple
498
  @return: (out, err, status)
499

500
  """
501
  poller = select.poll()
502

    
503
  stderr = subprocess.PIPE
504
  stdout = subprocess.PIPE
505
  stdin = subprocess.PIPE
506

    
507
  if interactive:
508
    stderr = stdout = stdin = None
509

    
510
  if noclose_fds:
511
    preexec_fn = lambda: CloseFDs(noclose_fds)
512
    close_fds = False
513
  else:
514
    preexec_fn = None
515
    close_fds = True
516

    
517
  child = subprocess.Popen(cmd, shell=via_shell,
518
                           stderr=stderr,
519
                           stdout=stdout,
520
                           stdin=stdin,
521
                           close_fds=close_fds, env=env,
522
                           cwd=cwd,
523
                           preexec_fn=preexec_fn)
524

    
525
  out = StringIO()
526
  err = StringIO()
527

    
528
  linger_timeout = None
529

    
530
  if timeout is None:
531
    poll_timeout = None
532
  else:
533
    poll_timeout = utils_algo.RunningTimeout(timeout, True).Remaining
534

    
535
  msg_timeout = ("Command %s (%d) run into execution timeout, terminating" %
536
                 (cmd, child.pid))
537
  msg_linger = ("Command %s (%d) run into linger timeout, killing" %
538
                (cmd, child.pid))
539

    
540
  timeout_action = _TIMEOUT_NONE
541

    
542
  if not interactive:
543
    child.stdin.close()
544
    poller.register(child.stdout, select.POLLIN)
545
    poller.register(child.stderr, select.POLLIN)
546
    fdmap = {
547
      child.stdout.fileno(): (out, child.stdout),
548
      child.stderr.fileno(): (err, child.stderr),
549
      }
550
    for fd in fdmap:
551
      utils_wrapper.SetNonblockFlag(fd, True)
552

    
553
    while fdmap:
554
      if poll_timeout:
555
        pt = poll_timeout() * 1000
556
        if pt < 0:
557
          if linger_timeout is None:
558
            logging.warning(msg_timeout)
559
            if child.poll() is None:
560
              timeout_action = _TIMEOUT_TERM
561
              utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid,
562
                                                  signal.SIGTERM)
563
            linger_timeout = \
564
              utils_algo.RunningTimeout(_linger_timeout, True).Remaining
565
          pt = linger_timeout() * 1000
566
          if pt < 0:
567
            break
568
      else:
569
        pt = None
570

    
571
      pollresult = utils_wrapper.RetryOnSignal(poller.poll, pt)
572

    
573
      for fd, event in pollresult:
574
        if event & select.POLLIN or event & select.POLLPRI:
575
          data = fdmap[fd][1].read()
576
          # no data from read signifies EOF (the same as POLLHUP)
577
          if not data:
578
            poller.unregister(fd)
579
            del fdmap[fd]
580
            continue
581
          fdmap[fd][0].write(data)
582
        if (event & select.POLLNVAL or event & select.POLLHUP or
583
            event & select.POLLERR):
584
          poller.unregister(fd)
585
          del fdmap[fd]
586

    
587
  if timeout is not None:
588
    assert callable(poll_timeout)
589

    
590
    # We have no I/O left but it might still run
591
    if child.poll() is None:
592
      _WaitForProcess(child, poll_timeout())
593

    
594
    # Terminate if still alive after timeout
595
    if child.poll() is None:
596
      if linger_timeout is None:
597
        logging.warning(msg_timeout)
598
        timeout_action = _TIMEOUT_TERM
599
        utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGTERM)
600
        lt = _linger_timeout
601
      else:
602
        lt = linger_timeout()
603
      _WaitForProcess(child, lt)
604

    
605
    # Okay, still alive after timeout and linger timeout? Kill it!
606
    if child.poll() is None:
607
      timeout_action = _TIMEOUT_KILL
608
      logging.warning(msg_linger)
609
      utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGKILL)
610

    
611
  out = out.getvalue()
612
  err = err.getvalue()
613

    
614
  status = child.wait()
615
  return out, err, status, timeout_action
616

    
617

    
618
def _RunCmdFile(cmd, env, via_shell, output, cwd, noclose_fds):
619
  """Run a command and save its output to a file.
620

621
  @type  cmd: string or list
622
  @param cmd: Command to run
623
  @type env: dict
624
  @param env: The environment to use
625
  @type via_shell: bool
626
  @param via_shell: if we should run via the shell
627
  @type output: str
628
  @param output: the filename in which to save the output
629
  @type cwd: string
630
  @param cwd: the working directory for the program
631
  @type noclose_fds: list
632
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
633
                      open for the child process
634
  @rtype: int
635
  @return: the exit status
636

637
  """
638
  fh = open(output, "a")
639

    
640
  if noclose_fds:
641
    preexec_fn = lambda: CloseFDs(noclose_fds + [fh.fileno()])
642
    close_fds = False
643
  else:
644
    preexec_fn = None
645
    close_fds = True
646

    
647
  try:
648
    child = subprocess.Popen(cmd, shell=via_shell,
649
                             stderr=subprocess.STDOUT,
650
                             stdout=fh,
651
                             stdin=subprocess.PIPE,
652
                             close_fds=close_fds, env=env,
653
                             cwd=cwd,
654
                             preexec_fn=preexec_fn)
655

    
656
    child.stdin.close()
657
    status = child.wait()
658
  finally:
659
    fh.close()
660
  return status
661

    
662

    
663
def RunParts(dir_name, env=None, reset_env=False):
664
  """Run Scripts or programs in a directory
665

666
  @type dir_name: string
667
  @param dir_name: absolute path to a directory
668
  @type env: dict
669
  @param env: The environment to use
670
  @type reset_env: boolean
671
  @param reset_env: whether to reset or keep the default os environment
672
  @rtype: list of tuples
673
  @return: list of (name, (one of RUNDIR_STATUS), RunResult)
674

675
  """
676
  rr = []
677

    
678
  try:
679
    dir_contents = utils_io.ListVisibleFiles(dir_name)
680
  except OSError, err:
681
    logging.warning("RunParts: skipping %s (cannot list: %s)", dir_name, err)
682
    return rr
683

    
684
  for relname in sorted(dir_contents):
685
    fname = utils_io.PathJoin(dir_name, relname)
686
    if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
687
            constants.EXT_PLUGIN_MASK.match(relname) is not None):
688
      rr.append((relname, constants.RUNPARTS_SKIP, None))
689
    else:
690
      try:
691
        result = RunCmd([fname], env=env, reset_env=reset_env)
692
      except Exception, err: # pylint: disable-msg=W0703
693
        rr.append((relname, constants.RUNPARTS_ERR, str(err)))
694
      else:
695
        rr.append((relname, constants.RUNPARTS_RUN, result))
696

    
697
  return rr
698

    
699

    
700
def _GetProcStatusPath(pid):
701
  """Returns the path for a PID's proc status file.
702

703
  @type pid: int
704
  @param pid: Process ID
705
  @rtype: string
706

707
  """
708
  return "/proc/%d/status" % pid
709

    
710

    
711
def IsProcessAlive(pid):
712
  """Check if a given pid exists on the system.
713

714
  @note: zombie status is not handled, so zombie processes
715
      will be returned as alive
716
  @type pid: int
717
  @param pid: the process ID to check
718
  @rtype: boolean
719
  @return: True if the process exists
720

721
  """
722
  def _TryStat(name):
723
    try:
724
      os.stat(name)
725
      return True
726
    except EnvironmentError, err:
727
      if err.errno in (errno.ENOENT, errno.ENOTDIR):
728
        return False
729
      elif err.errno == errno.EINVAL:
730
        raise utils_retry.RetryAgain(err)
731
      raise
732

    
733
  assert isinstance(pid, int), "pid must be an integer"
734
  if pid <= 0:
735
    return False
736

    
737
  # /proc in a multiprocessor environment can have strange behaviors.
738
  # Retry the os.stat a few times until we get a good result.
739
  try:
740
    return utils_retry.Retry(_TryStat, (0.01, 1.5, 0.1), 0.5,
741
                             args=[_GetProcStatusPath(pid)])
742
  except utils_retry.RetryTimeout, err:
743
    err.RaiseInner()
744

    
745

    
746
def _ParseSigsetT(sigset):
747
  """Parse a rendered sigset_t value.
748

749
  This is the opposite of the Linux kernel's fs/proc/array.c:render_sigset_t
750
  function.
751

752
  @type sigset: string
753
  @param sigset: Rendered signal set from /proc/$pid/status
754
  @rtype: set
755
  @return: Set of all enabled signal numbers
756

757
  """
758
  result = set()
759

    
760
  signum = 0
761
  for ch in reversed(sigset):
762
    chv = int(ch, 16)
763

    
764
    # The following could be done in a loop, but it's easier to read and
765
    # understand in the unrolled form
766
    if chv & 1:
767
      result.add(signum + 1)
768
    if chv & 2:
769
      result.add(signum + 2)
770
    if chv & 4:
771
      result.add(signum + 3)
772
    if chv & 8:
773
      result.add(signum + 4)
774

    
775
    signum += 4
776

    
777
  return result
778

    
779

    
780
def _GetProcStatusField(pstatus, field):
781
  """Retrieves a field from the contents of a proc status file.
782

783
  @type pstatus: string
784
  @param pstatus: Contents of /proc/$pid/status
785
  @type field: string
786
  @param field: Name of field whose value should be returned
787
  @rtype: string
788

789
  """
790
  for line in pstatus.splitlines():
791
    parts = line.split(":", 1)
792

    
793
    if len(parts) < 2 or parts[0] != field:
794
      continue
795

    
796
    return parts[1].strip()
797

    
798
  return None
799

    
800

    
801
def IsProcessHandlingSignal(pid, signum, status_path=None):
802
  """Checks whether a process is handling a signal.
803

804
  @type pid: int
805
  @param pid: Process ID
806
  @type signum: int
807
  @param signum: Signal number
808
  @rtype: bool
809

810
  """
811
  if status_path is None:
812
    status_path = _GetProcStatusPath(pid)
813

    
814
  try:
815
    proc_status = utils_io.ReadFile(status_path)
816
  except EnvironmentError, err:
817
    # In at least one case, reading /proc/$pid/status failed with ESRCH.
818
    if err.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL, errno.ESRCH):
819
      return False
820
    raise
821

    
822
  sigcgt = _GetProcStatusField(proc_status, "SigCgt")
823
  if sigcgt is None:
824
    raise RuntimeError("%s is missing 'SigCgt' field" % status_path)
825

    
826
  # Now check whether signal is handled
827
  return signum in _ParseSigsetT(sigcgt)
828

    
829

    
830
def Daemonize(logfile):
831
  """Daemonize the current process.
832

833
  This detaches the current process from the controlling terminal and
834
  runs it in the background as a daemon.
835

836
  @type logfile: str
837
  @param logfile: the logfile to which we should redirect stdout/stderr
838
  @rtype: int
839
  @return: the value zero
840

841
  """
842
  # pylint: disable-msg=W0212
843
  # yes, we really want os._exit
844

    
845
  # TODO: do another attempt to merge Daemonize and StartDaemon, or at
846
  # least abstract the pipe functionality between them
847

    
848
  # Create pipe for sending error messages
849
  (rpipe, wpipe) = os.pipe()
850

    
851
  # this might fail
852
  pid = os.fork()
853
  if (pid == 0):  # The first child.
854
    SetupDaemonEnv()
855

    
856
    # this might fail
857
    pid = os.fork() # Fork a second child.
858
    if (pid == 0):  # The second child.
859
      utils_wrapper.CloseFdNoError(rpipe)
860
    else:
861
      # exit() or _exit()?  See below.
862
      os._exit(0) # Exit parent (the first child) of the second child.
863
  else:
864
    utils_wrapper.CloseFdNoError(wpipe)
865
    # Wait for daemon to be started (or an error message to
866
    # arrive) and read up to 100 KB as an error message
867
    errormsg = utils_wrapper.RetryOnSignal(os.read, rpipe, 100 * 1024)
868
    if errormsg:
869
      sys.stderr.write("Error when starting daemon process: %r\n" % errormsg)
870
      rcode = 1
871
    else:
872
      rcode = 0
873
    os._exit(rcode) # Exit parent of the first child.
874

    
875
  SetupDaemonFDs(logfile, None)
876
  return wpipe
877

    
878

    
879
def KillProcess(pid, signal_=signal.SIGTERM, timeout=30,
880
                waitpid=False):
881
  """Kill a process given by its pid.
882

883
  @type pid: int
884
  @param pid: The PID to terminate.
885
  @type signal_: int
886
  @param signal_: The signal to send, by default SIGTERM
887
  @type timeout: int
888
  @param timeout: The timeout after which, if the process is still alive,
889
                  a SIGKILL will be sent. If not positive, no such checking
890
                  will be done
891
  @type waitpid: boolean
892
  @param waitpid: If true, we should waitpid on this process after
893
      sending signals, since it's our own child and otherwise it
894
      would remain as zombie
895

896
  """
897
  def _helper(pid, signal_, wait):
898
    """Simple helper to encapsulate the kill/waitpid sequence"""
899
    if utils_wrapper.IgnoreProcessNotFound(os.kill, pid, signal_) and wait:
900
      try:
901
        os.waitpid(pid, os.WNOHANG)
902
      except OSError:
903
        pass
904

    
905
  if pid <= 0:
906
    # kill with pid=0 == suicide
907
    raise errors.ProgrammerError("Invalid pid given '%s'" % pid)
908

    
909
  if not IsProcessAlive(pid):
910
    return
911

    
912
  _helper(pid, signal_, waitpid)
913

    
914
  if timeout <= 0:
915
    return
916

    
917
  def _CheckProcess():
918
    if not IsProcessAlive(pid):
919
      return
920

    
921
    try:
922
      (result_pid, _) = os.waitpid(pid, os.WNOHANG)
923
    except OSError:
924
      raise utils_retry.RetryAgain()
925

    
926
    if result_pid > 0:
927
      return
928

    
929
    raise utils_retry.RetryAgain()
930

    
931
  try:
932
    # Wait up to $timeout seconds
933
    utils_retry.Retry(_CheckProcess, (0.01, 1.5, 0.1), timeout)
934
  except utils_retry.RetryTimeout:
935
    pass
936

    
937
  if IsProcessAlive(pid):
938
    # Kill process if it's still alive
939
    _helper(pid, signal.SIGKILL, waitpid)
940

    
941

    
942
def RunInSeparateProcess(fn, *args):
943
  """Runs a function in a separate process.
944

945
  Note: Only boolean return values are supported.
946

947
  @type fn: callable
948
  @param fn: Function to be called
949
  @rtype: bool
950
  @return: Function's result
951

952
  """
953
  pid = os.fork()
954
  if pid == 0:
955
    # Child process
956
    try:
957
      # In case the function uses temporary files
958
      utils_wrapper.ResetTempfileModule()
959

    
960
      # Call function
961
      result = int(bool(fn(*args)))
962
      assert result in (0, 1)
963
    except: # pylint: disable-msg=W0702
964
      logging.exception("Error while calling function in separate process")
965
      # 0 and 1 are reserved for the return value
966
      result = 33
967

    
968
    os._exit(result) # pylint: disable-msg=W0212
969

    
970
  # Parent process
971

    
972
  # Avoid zombies and check exit code
973
  (_, status) = os.waitpid(pid, 0)
974

    
975
  if os.WIFSIGNALED(status):
976
    exitcode = None
977
    signum = os.WTERMSIG(status)
978
  else:
979
    exitcode = os.WEXITSTATUS(status)
980
    signum = None
981

    
982
  if not (exitcode in (0, 1) and signum is None):
983
    raise errors.GenericError("Child program failed (code=%s, signal=%s)" %
984
                              (exitcode, signum))
985

    
986
  return bool(exitcode)
987

    
988

    
989
def CloseFDs(noclose_fds=None):
990
  """Close file descriptors.
991

992
  This closes all file descriptors above 2 (i.e. except
993
  stdin/out/err).
994

995
  @type noclose_fds: list or None
996
  @param noclose_fds: if given, it denotes a list of file descriptor
997
      that should not be closed
998

999
  """
1000
  # Default maximum for the number of available file descriptors.
1001
  if 'SC_OPEN_MAX' in os.sysconf_names:
1002
    try:
1003
      MAXFD = os.sysconf('SC_OPEN_MAX')
1004
      if MAXFD < 0:
1005
        MAXFD = 1024
1006
    except OSError:
1007
      MAXFD = 1024
1008
  else:
1009
    MAXFD = 1024
1010

    
1011
  maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
1012
  if (maxfd == resource.RLIM_INFINITY):
1013
    maxfd = MAXFD
1014

    
1015
  # Iterate through and close all file descriptors (except the standard ones)
1016
  for fd in range(3, maxfd):
1017
    if noclose_fds and fd in noclose_fds:
1018
      continue
1019
    utils_wrapper.CloseFdNoError(fd)