Statistics
| Branch: | Tag: | Revision:

root / lib / utils / process.py @ 7b0bf9cd

History | View | Annotate | Download (27.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Utility functions for processes.
22

23
"""
24

    
25

    
26
import os
27
import sys
28
import subprocess
29
import errno
30
import select
31
import logging
32
import signal
33
import resource
34

    
35
from cStringIO import StringIO
36

    
37
from ganeti import errors
38
from ganeti import constants
39

    
40
from ganeti.utils import retry as utils_retry
41
from ganeti.utils import wrapper as utils_wrapper
42
from ganeti.utils import text as utils_text
43
from ganeti.utils import io as utils_io
44
from ganeti.utils import algo as utils_algo
45

    
46

    
47
#: when set to True, L{RunCmd} is disabled
48
_no_fork = False
49

    
50
(_TIMEOUT_NONE,
51
 _TIMEOUT_TERM,
52
 _TIMEOUT_KILL) = range(3)
53

    
54

    
55
def DisableFork():
56
  """Disables the use of fork(2).
57

58
  """
59
  global _no_fork # pylint: disable-msg=W0603
60

    
61
  _no_fork = True
62

    
63

    
64
class RunResult(object):
65
  """Holds the result of running external programs.
66

67
  @type exit_code: int
68
  @ivar exit_code: the exit code of the program, or None (if the program
69
      didn't exit())
70
  @type signal: int or None
71
  @ivar signal: the signal that caused the program to finish, or None
72
      (if the program wasn't terminated by a signal)
73
  @type stdout: str
74
  @ivar stdout: the standard output of the program
75
  @type stderr: str
76
  @ivar stderr: the standard error of the program
77
  @type failed: boolean
78
  @ivar failed: True in case the program was
79
      terminated by a signal or exited with a non-zero exit code
80
  @ivar fail_reason: a string detailing the termination reason
81

82
  """
83
  __slots__ = ["exit_code", "signal", "stdout", "stderr",
84
               "failed", "fail_reason", "cmd"]
85

    
86

    
87
  def __init__(self, exit_code, signal_, stdout, stderr, cmd, timeout_action,
88
               timeout):
89
    self.cmd = cmd
90
    self.exit_code = exit_code
91
    self.signal = signal_
92
    self.stdout = stdout
93
    self.stderr = stderr
94
    self.failed = (signal_ is not None or exit_code != 0)
95

    
96
    fail_msgs = []
97
    if self.signal is not None:
98
      fail_msgs.append("terminated by signal %s" % self.signal)
99
    elif self.exit_code is not None:
100
      fail_msgs.append("exited with exit code %s" % self.exit_code)
101
    else:
102
      fail_msgs.append("unable to determine termination reason")
103

    
104
    if timeout_action == _TIMEOUT_TERM:
105
      fail_msgs.append("terminated after timeout of %.2f seconds" % timeout)
106
    elif timeout_action == _TIMEOUT_KILL:
107
      fail_msgs.append(("force termination after timeout of %.2f seconds"
108
                        " and linger for another %.2f seconds") %
109
                       (timeout, constants.CHILD_LINGER_TIMEOUT))
110

    
111
    if fail_msgs and self.failed:
112
      self.fail_reason = utils_text.CommaJoin(fail_msgs)
113

    
114
    if self.failed:
115
      logging.debug("Command '%s' failed (%s); output: %s",
116
                    self.cmd, self.fail_reason, self.output)
117

    
118
  def _GetOutput(self):
119
    """Returns the combined stdout and stderr for easier usage.
120

121
    """
122
    return self.stdout + self.stderr
123

    
124
  output = property(_GetOutput, None, None, "Return full output")
125

    
126

    
127
def _BuildCmdEnvironment(env, reset):
128
  """Builds the environment for an external program.
129

130
  """
131
  if reset:
132
    cmd_env = {}
133
  else:
134
    cmd_env = os.environ.copy()
135
    cmd_env["LC_ALL"] = "C"
136

    
137
  if env is not None:
138
    cmd_env.update(env)
139

    
140
  return cmd_env
141

    
142

    
143
def RunCmd(cmd, env=None, output=None, cwd="/", reset_env=False,
144
           interactive=False, timeout=None, noclose_fds=None):
145
  """Execute a (shell) command.
146

147
  The command should not read from its standard input, as it will be
148
  closed.
149

150
  @type cmd: string or list
151
  @param cmd: Command to run
152
  @type env: dict
153
  @param env: Additional environment variables
154
  @type output: str
155
  @param output: if desired, the output of the command can be
156
      saved in a file instead of the RunResult instance; this
157
      parameter denotes the file name (if not None)
158
  @type cwd: string
159
  @param cwd: if specified, will be used as the working
160
      directory for the command; the default will be /
161
  @type reset_env: boolean
162
  @param reset_env: whether to reset or keep the default os environment
163
  @type interactive: boolean
164
  @param interactive: weather we pipe stdin, stdout and stderr
165
                      (default behaviour) or run the command interactive
166
  @type timeout: int
167
  @param timeout: If not None, timeout in seconds until child process gets
168
                  killed
169
  @type noclose_fds: list
170
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
171
                      open for the child process
172
  @rtype: L{RunResult}
173
  @return: RunResult instance
174
  @raise errors.ProgrammerError: if we call this when forks are disabled
175

176
  """
177
  if _no_fork:
178
    raise errors.ProgrammerError("utils.RunCmd() called with fork() disabled")
179

    
180
  if output and interactive:
181
    raise errors.ProgrammerError("Parameters 'output' and 'interactive' can"
182
                                 " not be provided at the same time")
183

    
184
  if isinstance(cmd, basestring):
185
    strcmd = cmd
186
    shell = True
187
  else:
188
    cmd = [str(val) for val in cmd]
189
    strcmd = utils_text.ShellQuoteArgs(cmd)
190
    shell = False
191

    
192
  if output:
193
    logging.debug("RunCmd %s, output file '%s'", strcmd, output)
194
  else:
195
    logging.debug("RunCmd %s", strcmd)
196

    
197
  cmd_env = _BuildCmdEnvironment(env, reset_env)
198

    
199
  try:
200
    if output is None:
201
      out, err, status, timeout_action = _RunCmdPipe(cmd, cmd_env, shell, cwd,
202
                                                     interactive, timeout,
203
                                                     noclose_fds)
204
    else:
205
      timeout_action = _TIMEOUT_NONE
206
      status = _RunCmdFile(cmd, cmd_env, shell, output, cwd, noclose_fds)
207
      out = err = ""
208
  except OSError, err:
209
    if err.errno == errno.ENOENT:
210
      raise errors.OpExecError("Can't execute '%s': not found (%s)" %
211
                               (strcmd, err))
212
    else:
213
      raise
214

    
215
  if status >= 0:
216
    exitcode = status
217
    signal_ = None
218
  else:
219
    exitcode = None
220
    signal_ = -status
221

    
222
  return RunResult(exitcode, signal_, out, err, strcmd, timeout_action, timeout)
223

    
224

    
225
def SetupDaemonEnv(cwd="/", umask=077):
226
  """Setup a daemon's environment.
227

228
  This should be called between the first and second fork, due to
229
  setsid usage.
230

231
  @param cwd: the directory to which to chdir
232
  @param umask: the umask to setup
233

234
  """
235
  os.chdir(cwd)
236
  os.umask(umask)
237
  os.setsid()
238

    
239

    
240
def SetupDaemonFDs(output_file, output_fd):
241
  """Setups up a daemon's file descriptors.
242

243
  @param output_file: if not None, the file to which to redirect
244
      stdout/stderr
245
  @param output_fd: if not None, the file descriptor for stdout/stderr
246

247
  """
248
  # check that at most one is defined
249
  assert [output_file, output_fd].count(None) >= 1
250

    
251
  # Open /dev/null (read-only, only for stdin)
252
  devnull_fd = os.open(os.devnull, os.O_RDONLY)
253

    
254
  if output_fd is not None:
255
    pass
256
  elif output_file is not None:
257
    # Open output file
258
    try:
259
      output_fd = os.open(output_file,
260
                          os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600)
261
    except EnvironmentError, err:
262
      raise Exception("Opening output file failed: %s" % err)
263
  else:
264
    output_fd = os.open(os.devnull, os.O_WRONLY)
265

    
266
  # Redirect standard I/O
267
  os.dup2(devnull_fd, 0)
268
  os.dup2(output_fd, 1)
269
  os.dup2(output_fd, 2)
270

    
271

    
272
def StartDaemon(cmd, env=None, cwd="/", output=None, output_fd=None,
273
                pidfile=None):
274
  """Start a daemon process after forking twice.
275

276
  @type cmd: string or list
277
  @param cmd: Command to run
278
  @type env: dict
279
  @param env: Additional environment variables
280
  @type cwd: string
281
  @param cwd: Working directory for the program
282
  @type output: string
283
  @param output: Path to file in which to save the output
284
  @type output_fd: int
285
  @param output_fd: File descriptor for output
286
  @type pidfile: string
287
  @param pidfile: Process ID file
288
  @rtype: int
289
  @return: Daemon process ID
290
  @raise errors.ProgrammerError: if we call this when forks are disabled
291

292
  """
293
  if _no_fork:
294
    raise errors.ProgrammerError("utils.StartDaemon() called with fork()"
295
                                 " disabled")
296

    
297
  if output and not (bool(output) ^ (output_fd is not None)):
298
    raise errors.ProgrammerError("Only one of 'output' and 'output_fd' can be"
299
                                 " specified")
300

    
301
  if isinstance(cmd, basestring):
302
    cmd = ["/bin/sh", "-c", cmd]
303

    
304
  strcmd = utils_text.ShellQuoteArgs(cmd)
305

    
306
  if output:
307
    logging.debug("StartDaemon %s, output file '%s'", strcmd, output)
308
  else:
309
    logging.debug("StartDaemon %s", strcmd)
310

    
311
  cmd_env = _BuildCmdEnvironment(env, False)
312

    
313
  # Create pipe for sending PID back
314
  (pidpipe_read, pidpipe_write) = os.pipe()
315
  try:
316
    try:
317
      # Create pipe for sending error messages
318
      (errpipe_read, errpipe_write) = os.pipe()
319
      try:
320
        try:
321
          # First fork
322
          pid = os.fork()
323
          if pid == 0:
324
            try:
325
              # Child process, won't return
326
              _StartDaemonChild(errpipe_read, errpipe_write,
327
                                pidpipe_read, pidpipe_write,
328
                                cmd, cmd_env, cwd,
329
                                output, output_fd, pidfile)
330
            finally:
331
              # Well, maybe child process failed
332
              os._exit(1) # pylint: disable-msg=W0212
333
        finally:
334
          utils_wrapper.CloseFdNoError(errpipe_write)
335

    
336
        # Wait for daemon to be started (or an error message to
337
        # arrive) and read up to 100 KB as an error message
338
        errormsg = utils_wrapper.RetryOnSignal(os.read, errpipe_read,
339
                                               100 * 1024)
340
      finally:
341
        utils_wrapper.CloseFdNoError(errpipe_read)
342
    finally:
343
      utils_wrapper.CloseFdNoError(pidpipe_write)
344

    
345
    # Read up to 128 bytes for PID
346
    pidtext = utils_wrapper.RetryOnSignal(os.read, pidpipe_read, 128)
347
  finally:
348
    utils_wrapper.CloseFdNoError(pidpipe_read)
349

    
350
  # Try to avoid zombies by waiting for child process
351
  try:
352
    os.waitpid(pid, 0)
353
  except OSError:
354
    pass
355

    
356
  if errormsg:
357
    raise errors.OpExecError("Error when starting daemon process: %r" %
358
                             errormsg)
359

    
360
  try:
361
    return int(pidtext)
362
  except (ValueError, TypeError), err:
363
    raise errors.OpExecError("Error while trying to parse PID %r: %s" %
364
                             (pidtext, err))
365

    
366

    
367
def _StartDaemonChild(errpipe_read, errpipe_write,
368
                      pidpipe_read, pidpipe_write,
369
                      args, env, cwd,
370
                      output, fd_output, pidfile):
371
  """Child process for starting daemon.
372

373
  """
374
  try:
375
    # Close parent's side
376
    utils_wrapper.CloseFdNoError(errpipe_read)
377
    utils_wrapper.CloseFdNoError(pidpipe_read)
378

    
379
    # First child process
380
    SetupDaemonEnv()
381

    
382
    # And fork for the second time
383
    pid = os.fork()
384
    if pid != 0:
385
      # Exit first child process
386
      os._exit(0) # pylint: disable-msg=W0212
387

    
388
    # Make sure pipe is closed on execv* (and thereby notifies
389
    # original process)
390
    utils_wrapper.SetCloseOnExecFlag(errpipe_write, True)
391

    
392
    # List of file descriptors to be left open
393
    noclose_fds = [errpipe_write]
394

    
395
    # Open PID file
396
    if pidfile:
397
      fd_pidfile = utils_io.WritePidFile(pidfile)
398

    
399
      # Keeping the file open to hold the lock
400
      noclose_fds.append(fd_pidfile)
401

    
402
      utils_wrapper.SetCloseOnExecFlag(fd_pidfile, False)
403
    else:
404
      fd_pidfile = None
405

    
406
    SetupDaemonFDs(output, fd_output)
407

    
408
    # Send daemon PID to parent
409
    utils_wrapper.RetryOnSignal(os.write, pidpipe_write, str(os.getpid()))
410

    
411
    # Close all file descriptors except stdio and error message pipe
412
    CloseFDs(noclose_fds=noclose_fds)
413

    
414
    # Change working directory
415
    os.chdir(cwd)
416

    
417
    if env is None:
418
      os.execvp(args[0], args)
419
    else:
420
      os.execvpe(args[0], args, env)
421
  except: # pylint: disable-msg=W0702
422
    try:
423
      # Report errors to original process
424
      WriteErrorToFD(errpipe_write, str(sys.exc_info()[1]))
425
    except: # pylint: disable-msg=W0702
426
      # Ignore errors in error handling
427
      pass
428

    
429
  os._exit(1) # pylint: disable-msg=W0212
430

    
431

    
432
def WriteErrorToFD(fd, err):
433
  """Possibly write an error message to a fd.
434

435
  @type fd: None or int (file descriptor)
436
  @param fd: if not None, the error will be written to this fd
437
  @param err: string, the error message
438

439
  """
440
  if fd is None:
441
    return
442

    
443
  if not err:
444
    err = "<unknown error>"
445

    
446
  utils_wrapper.RetryOnSignal(os.write, fd, err)
447

    
448

    
449
def _CheckIfAlive(child):
450
  """Raises L{utils_retry.RetryAgain} if child is still alive.
451

452
  @raises utils_retry.RetryAgain: If child is still alive
453

454
  """
455
  if child.poll() is None:
456
    raise utils_retry.RetryAgain()
457

    
458

    
459
def _WaitForProcess(child, timeout):
460
  """Waits for the child to terminate or until we reach timeout.
461

462
  """
463
  try:
464
    utils_retry.Retry(_CheckIfAlive, (1.0, 1.2, 5.0), max(0, timeout),
465
                      args=[child])
466
  except utils_retry.RetryTimeout:
467
    pass
468

    
469

    
470
def _RunCmdPipe(cmd, env, via_shell, cwd, interactive, timeout, noclose_fds,
471
                _linger_timeout=constants.CHILD_LINGER_TIMEOUT):
472
  """Run a command and return its output.
473

474
  @type  cmd: string or list
475
  @param cmd: Command to run
476
  @type env: dict
477
  @param env: The environment to use
478
  @type via_shell: bool
479
  @param via_shell: if we should run via the shell
480
  @type cwd: string
481
  @param cwd: the working directory for the program
482
  @type interactive: boolean
483
  @param interactive: Run command interactive (without piping)
484
  @type timeout: int
485
  @param timeout: Timeout after the programm gets terminated
486
  @type noclose_fds: list
487
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
488
                      open for the child process
489
  @rtype: tuple
490
  @return: (out, err, status)
491

492
  """
493
  poller = select.poll()
494

    
495
  stderr = subprocess.PIPE
496
  stdout = subprocess.PIPE
497
  stdin = subprocess.PIPE
498

    
499
  if interactive:
500
    stderr = stdout = stdin = None
501

    
502
  if noclose_fds:
503
    preexec_fn = lambda: CloseFDs(noclose_fds)
504
    close_fds = False
505
  else:
506
    preexec_fn = None
507
    close_fds = True
508

    
509
  child = subprocess.Popen(cmd, shell=via_shell,
510
                           stderr=stderr,
511
                           stdout=stdout,
512
                           stdin=stdin,
513
                           close_fds=close_fds, env=env,
514
                           cwd=cwd,
515
                           preexec_fn=preexec_fn)
516

    
517
  out = StringIO()
518
  err = StringIO()
519

    
520
  linger_timeout = None
521

    
522
  if timeout is None:
523
    poll_timeout = None
524
  else:
525
    poll_timeout = utils_algo.RunningTimeout(timeout, True).Remaining
526

    
527
  msg_timeout = ("Command %s (%d) run into execution timeout, terminating" %
528
                 (cmd, child.pid))
529
  msg_linger = ("Command %s (%d) run into linger timeout, killing" %
530
                (cmd, child.pid))
531

    
532
  timeout_action = _TIMEOUT_NONE
533

    
534
  if not interactive:
535
    child.stdin.close()
536
    poller.register(child.stdout, select.POLLIN)
537
    poller.register(child.stderr, select.POLLIN)
538
    fdmap = {
539
      child.stdout.fileno(): (out, child.stdout),
540
      child.stderr.fileno(): (err, child.stderr),
541
      }
542
    for fd in fdmap:
543
      utils_wrapper.SetNonblockFlag(fd, True)
544

    
545
    while fdmap:
546
      if poll_timeout:
547
        pt = poll_timeout() * 1000
548
        if pt < 0:
549
          if linger_timeout is None:
550
            logging.warning(msg_timeout)
551
            if child.poll() is None:
552
              timeout_action = _TIMEOUT_TERM
553
              utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid,
554
                                                  signal.SIGTERM)
555
            linger_timeout = \
556
              utils_algo.RunningTimeout(_linger_timeout, True).Remaining
557
          pt = linger_timeout() * 1000
558
          if pt < 0:
559
            break
560
      else:
561
        pt = None
562

    
563
      pollresult = utils_wrapper.RetryOnSignal(poller.poll, pt)
564

    
565
      for fd, event in pollresult:
566
        if event & select.POLLIN or event & select.POLLPRI:
567
          data = fdmap[fd][1].read()
568
          # no data from read signifies EOF (the same as POLLHUP)
569
          if not data:
570
            poller.unregister(fd)
571
            del fdmap[fd]
572
            continue
573
          fdmap[fd][0].write(data)
574
        if (event & select.POLLNVAL or event & select.POLLHUP or
575
            event & select.POLLERR):
576
          poller.unregister(fd)
577
          del fdmap[fd]
578

    
579
  if timeout is not None:
580
    assert callable(poll_timeout)
581

    
582
    # We have no I/O left but it might still run
583
    if child.poll() is None:
584
      _WaitForProcess(child, poll_timeout())
585

    
586
    # Terminate if still alive after timeout
587
    if child.poll() is None:
588
      if linger_timeout is None:
589
        logging.warning(msg_timeout)
590
        timeout_action = _TIMEOUT_TERM
591
        utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGTERM)
592
        lt = _linger_timeout
593
      else:
594
        lt = linger_timeout()
595
      _WaitForProcess(child, lt)
596

    
597
    # Okay, still alive after timeout and linger timeout? Kill it!
598
    if child.poll() is None:
599
      timeout_action = _TIMEOUT_KILL
600
      logging.warning(msg_linger)
601
      utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGKILL)
602

    
603
  out = out.getvalue()
604
  err = err.getvalue()
605

    
606
  status = child.wait()
607
  return out, err, status, timeout_action
608

    
609

    
610
def _RunCmdFile(cmd, env, via_shell, output, cwd, noclose_fds):
611
  """Run a command and save its output to a file.
612

613
  @type  cmd: string or list
614
  @param cmd: Command to run
615
  @type env: dict
616
  @param env: The environment to use
617
  @type via_shell: bool
618
  @param via_shell: if we should run via the shell
619
  @type output: str
620
  @param output: the filename in which to save the output
621
  @type cwd: string
622
  @param cwd: the working directory for the program
623
  @type noclose_fds: list
624
  @param noclose_fds: list of additional (fd >=3) file descriptors to leave
625
                      open for the child process
626
  @rtype: int
627
  @return: the exit status
628

629
  """
630
  fh = open(output, "a")
631

    
632
  if noclose_fds:
633
    preexec_fn = lambda: CloseFDs(noclose_fds + [fh.fileno()])
634
    close_fds = False
635
  else:
636
    preexec_fn = None
637
    close_fds = True
638

    
639
  try:
640
    child = subprocess.Popen(cmd, shell=via_shell,
641
                             stderr=subprocess.STDOUT,
642
                             stdout=fh,
643
                             stdin=subprocess.PIPE,
644
                             close_fds=close_fds, env=env,
645
                             cwd=cwd,
646
                             preexec_fn=preexec_fn)
647

    
648
    child.stdin.close()
649
    status = child.wait()
650
  finally:
651
    fh.close()
652
  return status
653

    
654

    
655
def RunParts(dir_name, env=None, reset_env=False):
656
  """Run Scripts or programs in a directory
657

658
  @type dir_name: string
659
  @param dir_name: absolute path to a directory
660
  @type env: dict
661
  @param env: The environment to use
662
  @type reset_env: boolean
663
  @param reset_env: whether to reset or keep the default os environment
664
  @rtype: list of tuples
665
  @return: list of (name, (one of RUNDIR_STATUS), RunResult)
666

667
  """
668
  rr = []
669

    
670
  try:
671
    dir_contents = utils_io.ListVisibleFiles(dir_name)
672
  except OSError, err:
673
    logging.warning("RunParts: skipping %s (cannot list: %s)", dir_name, err)
674
    return rr
675

    
676
  for relname in sorted(dir_contents):
677
    fname = utils_io.PathJoin(dir_name, relname)
678
    if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
679
            constants.EXT_PLUGIN_MASK.match(relname) is not None):
680
      rr.append((relname, constants.RUNPARTS_SKIP, None))
681
    else:
682
      try:
683
        result = RunCmd([fname], env=env, reset_env=reset_env)
684
      except Exception, err: # pylint: disable-msg=W0703
685
        rr.append((relname, constants.RUNPARTS_ERR, str(err)))
686
      else:
687
        rr.append((relname, constants.RUNPARTS_RUN, result))
688

    
689
  return rr
690

    
691

    
692
def _GetProcStatusPath(pid):
693
  """Returns the path for a PID's proc status file.
694

695
  @type pid: int
696
  @param pid: Process ID
697
  @rtype: string
698

699
  """
700
  return "/proc/%d/status" % pid
701

    
702

    
703
def IsProcessAlive(pid):
704
  """Check if a given pid exists on the system.
705

706
  @note: zombie status is not handled, so zombie processes
707
      will be returned as alive
708
  @type pid: int
709
  @param pid: the process ID to check
710
  @rtype: boolean
711
  @return: True if the process exists
712

713
  """
714
  def _TryStat(name):
715
    try:
716
      os.stat(name)
717
      return True
718
    except EnvironmentError, err:
719
      if err.errno in (errno.ENOENT, errno.ENOTDIR):
720
        return False
721
      elif err.errno == errno.EINVAL:
722
        raise utils_retry.RetryAgain(err)
723
      raise
724

    
725
  assert isinstance(pid, int), "pid must be an integer"
726
  if pid <= 0:
727
    return False
728

    
729
  # /proc in a multiprocessor environment can have strange behaviors.
730
  # Retry the os.stat a few times until we get a good result.
731
  try:
732
    return utils_retry.Retry(_TryStat, (0.01, 1.5, 0.1), 0.5,
733
                             args=[_GetProcStatusPath(pid)])
734
  except utils_retry.RetryTimeout, err:
735
    err.RaiseInner()
736

    
737

    
738
def _ParseSigsetT(sigset):
739
  """Parse a rendered sigset_t value.
740

741
  This is the opposite of the Linux kernel's fs/proc/array.c:render_sigset_t
742
  function.
743

744
  @type sigset: string
745
  @param sigset: Rendered signal set from /proc/$pid/status
746
  @rtype: set
747
  @return: Set of all enabled signal numbers
748

749
  """
750
  result = set()
751

    
752
  signum = 0
753
  for ch in reversed(sigset):
754
    chv = int(ch, 16)
755

    
756
    # The following could be done in a loop, but it's easier to read and
757
    # understand in the unrolled form
758
    if chv & 1:
759
      result.add(signum + 1)
760
    if chv & 2:
761
      result.add(signum + 2)
762
    if chv & 4:
763
      result.add(signum + 3)
764
    if chv & 8:
765
      result.add(signum + 4)
766

    
767
    signum += 4
768

    
769
  return result
770

    
771

    
772
def _GetProcStatusField(pstatus, field):
773
  """Retrieves a field from the contents of a proc status file.
774

775
  @type pstatus: string
776
  @param pstatus: Contents of /proc/$pid/status
777
  @type field: string
778
  @param field: Name of field whose value should be returned
779
  @rtype: string
780

781
  """
782
  for line in pstatus.splitlines():
783
    parts = line.split(":", 1)
784

    
785
    if len(parts) < 2 or parts[0] != field:
786
      continue
787

    
788
    return parts[1].strip()
789

    
790
  return None
791

    
792

    
793
def IsProcessHandlingSignal(pid, signum, status_path=None):
794
  """Checks whether a process is handling a signal.
795

796
  @type pid: int
797
  @param pid: Process ID
798
  @type signum: int
799
  @param signum: Signal number
800
  @rtype: bool
801

802
  """
803
  if status_path is None:
804
    status_path = _GetProcStatusPath(pid)
805

    
806
  try:
807
    proc_status = utils_io.ReadFile(status_path)
808
  except EnvironmentError, err:
809
    # In at least one case, reading /proc/$pid/status failed with ESRCH.
810
    if err.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL, errno.ESRCH):
811
      return False
812
    raise
813

    
814
  sigcgt = _GetProcStatusField(proc_status, "SigCgt")
815
  if sigcgt is None:
816
    raise RuntimeError("%s is missing 'SigCgt' field" % status_path)
817

    
818
  # Now check whether signal is handled
819
  return signum in _ParseSigsetT(sigcgt)
820

    
821

    
822
def Daemonize(logfile):
823
  """Daemonize the current process.
824

825
  This detaches the current process from the controlling terminal and
826
  runs it in the background as a daemon.
827

828
  @type logfile: str
829
  @param logfile: the logfile to which we should redirect stdout/stderr
830
  @rtype: int
831
  @return: the value zero
832

833
  """
834
  # pylint: disable-msg=W0212
835
  # yes, we really want os._exit
836

    
837
  # TODO: do another attempt to merge Daemonize and StartDaemon, or at
838
  # least abstract the pipe functionality between them
839

    
840
  # Create pipe for sending error messages
841
  (rpipe, wpipe) = os.pipe()
842

    
843
  # this might fail
844
  pid = os.fork()
845
  if (pid == 0):  # The first child.
846
    SetupDaemonEnv()
847

    
848
    # this might fail
849
    pid = os.fork() # Fork a second child.
850
    if (pid == 0):  # The second child.
851
      utils_wrapper.CloseFdNoError(rpipe)
852
    else:
853
      # exit() or _exit()?  See below.
854
      os._exit(0) # Exit parent (the first child) of the second child.
855
  else:
856
    utils_wrapper.CloseFdNoError(wpipe)
857
    # Wait for daemon to be started (or an error message to
858
    # arrive) and read up to 100 KB as an error message
859
    errormsg = utils_wrapper.RetryOnSignal(os.read, rpipe, 100 * 1024)
860
    if errormsg:
861
      sys.stderr.write("Error when starting daemon process: %r\n" % errormsg)
862
      rcode = 1
863
    else:
864
      rcode = 0
865
    os._exit(rcode) # Exit parent of the first child.
866

    
867
  SetupDaemonFDs(logfile, None)
868
  return wpipe
869

    
870

    
871
def KillProcess(pid, signal_=signal.SIGTERM, timeout=30,
872
                waitpid=False):
873
  """Kill a process given by its pid.
874

875
  @type pid: int
876
  @param pid: The PID to terminate.
877
  @type signal_: int
878
  @param signal_: The signal to send, by default SIGTERM
879
  @type timeout: int
880
  @param timeout: The timeout after which, if the process is still alive,
881
                  a SIGKILL will be sent. If not positive, no such checking
882
                  will be done
883
  @type waitpid: boolean
884
  @param waitpid: If true, we should waitpid on this process after
885
      sending signals, since it's our own child and otherwise it
886
      would remain as zombie
887

888
  """
889
  def _helper(pid, signal_, wait):
890
    """Simple helper to encapsulate the kill/waitpid sequence"""
891
    if utils_wrapper.IgnoreProcessNotFound(os.kill, pid, signal_) and wait:
892
      try:
893
        os.waitpid(pid, os.WNOHANG)
894
      except OSError:
895
        pass
896

    
897
  if pid <= 0:
898
    # kill with pid=0 == suicide
899
    raise errors.ProgrammerError("Invalid pid given '%s'" % pid)
900

    
901
  if not IsProcessAlive(pid):
902
    return
903

    
904
  _helper(pid, signal_, waitpid)
905

    
906
  if timeout <= 0:
907
    return
908

    
909
  def _CheckProcess():
910
    if not IsProcessAlive(pid):
911
      return
912

    
913
    try:
914
      (result_pid, _) = os.waitpid(pid, os.WNOHANG)
915
    except OSError:
916
      raise utils_retry.RetryAgain()
917

    
918
    if result_pid > 0:
919
      return
920

    
921
    raise utils_retry.RetryAgain()
922

    
923
  try:
924
    # Wait up to $timeout seconds
925
    utils_retry.Retry(_CheckProcess, (0.01, 1.5, 0.1), timeout)
926
  except utils_retry.RetryTimeout:
927
    pass
928

    
929
  if IsProcessAlive(pid):
930
    # Kill process if it's still alive
931
    _helper(pid, signal.SIGKILL, waitpid)
932

    
933

    
934
def RunInSeparateProcess(fn, *args):
935
  """Runs a function in a separate process.
936

937
  Note: Only boolean return values are supported.
938

939
  @type fn: callable
940
  @param fn: Function to be called
941
  @rtype: bool
942
  @return: Function's result
943

944
  """
945
  pid = os.fork()
946
  if pid == 0:
947
    # Child process
948
    try:
949
      # In case the function uses temporary files
950
      utils_wrapper.ResetTempfileModule()
951

    
952
      # Call function
953
      result = int(bool(fn(*args)))
954
      assert result in (0, 1)
955
    except: # pylint: disable-msg=W0702
956
      logging.exception("Error while calling function in separate process")
957
      # 0 and 1 are reserved for the return value
958
      result = 33
959

    
960
    os._exit(result) # pylint: disable-msg=W0212
961

    
962
  # Parent process
963

    
964
  # Avoid zombies and check exit code
965
  (_, status) = os.waitpid(pid, 0)
966

    
967
  if os.WIFSIGNALED(status):
968
    exitcode = None
969
    signum = os.WTERMSIG(status)
970
  else:
971
    exitcode = os.WEXITSTATUS(status)
972
    signum = None
973

    
974
  if not (exitcode in (0, 1) and signum is None):
975
    raise errors.GenericError("Child program failed (code=%s, signal=%s)" %
976
                              (exitcode, signum))
977

    
978
  return bool(exitcode)
979

    
980

    
981
def CloseFDs(noclose_fds=None):
982
  """Close file descriptors.
983

984
  This closes all file descriptors above 2 (i.e. except
985
  stdin/out/err).
986

987
  @type noclose_fds: list or None
988
  @param noclose_fds: if given, it denotes a list of file descriptor
989
      that should not be closed
990

991
  """
992
  # Default maximum for the number of available file descriptors.
993
  if 'SC_OPEN_MAX' in os.sysconf_names:
994
    try:
995
      MAXFD = os.sysconf('SC_OPEN_MAX')
996
      if MAXFD < 0:
997
        MAXFD = 1024
998
    except OSError:
999
      MAXFD = 1024
1000
  else:
1001
    MAXFD = 1024
1002

    
1003
  maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
1004
  if (maxfd == resource.RLIM_INFINITY):
1005
    maxfd = MAXFD
1006

    
1007
  # Iterate through and close all file descriptors (except the standard ones)
1008
  for fd in range(3, maxfd):
1009
    if noclose_fds and fd in noclose_fds:
1010
      continue
1011
    utils_wrapper.CloseFdNoError(fd)