Statistics
| Branch: | Tag: | Revision:

root / lib / utils.py @ b9bddb6b

History | View | Annotate | Download (36.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Ganeti small utilities
23

24
"""
25

    
26

    
27
import sys
28
import os
29
import sha
30
import time
31
import subprocess
32
import re
33
import socket
34
import tempfile
35
import shutil
36
import errno
37
import pwd
38
import itertools
39
import select
40
import fcntl
41
import resource
42
import logging
43
import signal
44

    
45
from cStringIO import StringIO
46

    
47
from ganeti import errors
48
from ganeti import constants
49

    
50

    
51
_locksheld = []
52
_re_shell_unquoted = re.compile('^[-.,=:/_+@A-Za-z0-9]+$')
53

    
54
debug = False
55
debug_locks = False
56
no_fork = False
57

    
58

    
59
class RunResult(object):
60
  """Simple class for holding the result of running external programs.
61

62
  Instance variables:
63
    exit_code: the exit code of the program, or None (if the program
64
               didn't exit())
65
    signal: numeric signal that caused the program to finish, or None
66
            (if the program wasn't terminated by a signal)
67
    stdout: the standard output of the program
68
    stderr: the standard error of the program
69
    failed: a Boolean value which is True in case the program was
70
            terminated by a signal or exited with a non-zero exit code
71
    fail_reason: a string detailing the termination reason
72

73
  """
74
  __slots__ = ["exit_code", "signal", "stdout", "stderr",
75
               "failed", "fail_reason", "cmd"]
76

    
77

    
78
  def __init__(self, exit_code, signal_, stdout, stderr, cmd):
79
    self.cmd = cmd
80
    self.exit_code = exit_code
81
    self.signal = signal_
82
    self.stdout = stdout
83
    self.stderr = stderr
84
    self.failed = (signal_ is not None or exit_code != 0)
85

    
86
    if self.signal is not None:
87
      self.fail_reason = "terminated by signal %s" % self.signal
88
    elif self.exit_code is not None:
89
      self.fail_reason = "exited with exit code %s" % self.exit_code
90
    else:
91
      self.fail_reason = "unable to determine termination reason"
92

    
93
    if self.failed:
94
      logging.debug("Command '%s' failed (%s); output: %s",
95
                    self.cmd, self.fail_reason, self.output)
96

    
97
  def _GetOutput(self):
98
    """Returns the combined stdout and stderr for easier usage.
99

100
    """
101
    return self.stdout + self.stderr
102

    
103
  output = property(_GetOutput, None, None, "Return full output")
104

    
105

    
106
def RunCmd(cmd, env=None):
107
  """Execute a (shell) command.
108

109
  The command should not read from its standard input, as it will be
110
  closed.
111

112
  @param cmd: Command to run
113
  @type  cmd: string or list
114
  @param env: Additional environment
115
  @type env: dict
116
  @return: `RunResult` instance
117
  @rtype: RunResult
118

119
  """
120
  if no_fork:
121
    raise errors.ProgrammerError("utils.RunCmd() called with fork() disabled")
122

    
123
  if isinstance(cmd, list):
124
    cmd = [str(val) for val in cmd]
125
    strcmd = " ".join(cmd)
126
    shell = False
127
  else:
128
    strcmd = cmd
129
    shell = True
130
  logging.debug("RunCmd '%s'", strcmd)
131

    
132
  cmd_env = os.environ.copy()
133
  cmd_env["LC_ALL"] = "C"
134
  if env is not None:
135
    cmd_env.update(env)
136

    
137
  poller = select.poll()
138
  child = subprocess.Popen(cmd, shell=shell,
139
                           stderr=subprocess.PIPE,
140
                           stdout=subprocess.PIPE,
141
                           stdin=subprocess.PIPE,
142
                           close_fds=True, env=cmd_env)
143

    
144
  child.stdin.close()
145
  poller.register(child.stdout, select.POLLIN)
146
  poller.register(child.stderr, select.POLLIN)
147
  out = StringIO()
148
  err = StringIO()
149
  fdmap = {
150
    child.stdout.fileno(): (out, child.stdout),
151
    child.stderr.fileno(): (err, child.stderr),
152
    }
153
  for fd in fdmap:
154
    status = fcntl.fcntl(fd, fcntl.F_GETFL)
155
    fcntl.fcntl(fd, fcntl.F_SETFL, status | os.O_NONBLOCK)
156

    
157
  while fdmap:
158
    for fd, event in poller.poll():
159
      if event & select.POLLIN or event & select.POLLPRI:
160
        data = fdmap[fd][1].read()
161
        # no data from read signifies EOF (the same as POLLHUP)
162
        if not data:
163
          poller.unregister(fd)
164
          del fdmap[fd]
165
          continue
166
        fdmap[fd][0].write(data)
167
      if (event & select.POLLNVAL or event & select.POLLHUP or
168
          event & select.POLLERR):
169
        poller.unregister(fd)
170
        del fdmap[fd]
171

    
172
  out = out.getvalue()
173
  err = err.getvalue()
174

    
175
  status = child.wait()
176
  if status >= 0:
177
    exitcode = status
178
    signal_ = None
179
  else:
180
    exitcode = None
181
    signal_ = -status
182

    
183
  return RunResult(exitcode, signal_, out, err, strcmd)
184

    
185

    
186
def RemoveFile(filename):
187
  """Remove a file ignoring some errors.
188

189
  Remove a file, ignoring non-existing ones or directories. Other
190
  errors are passed.
191

192
  """
193
  try:
194
    os.unlink(filename)
195
  except OSError, err:
196
    if err.errno not in (errno.ENOENT, errno.EISDIR):
197
      raise
198

    
199

    
200
def _FingerprintFile(filename):
201
  """Compute the fingerprint of a file.
202

203
  If the file does not exist, a None will be returned
204
  instead.
205

206
  Args:
207
    filename - Filename (str)
208

209
  """
210
  if not (os.path.exists(filename) and os.path.isfile(filename)):
211
    return None
212

    
213
  f = open(filename)
214

    
215
  fp = sha.sha()
216
  while True:
217
    data = f.read(4096)
218
    if not data:
219
      break
220

    
221
    fp.update(data)
222

    
223
  return fp.hexdigest()
224

    
225

    
226
def FingerprintFiles(files):
227
  """Compute fingerprints for a list of files.
228

229
  Args:
230
    files - array of filenames.  ( [str, ...] )
231

232
  Return value:
233
    dictionary of filename: fingerprint for the files that exist
234

235
  """
236
  ret = {}
237

    
238
  for filename in files:
239
    cksum = _FingerprintFile(filename)
240
    if cksum:
241
      ret[filename] = cksum
242

    
243
  return ret
244

    
245

    
246
def CheckDict(target, template, logname=None):
247
  """Ensure a dictionary has a required set of keys.
248

249
  For the given dictionaries `target` and `template`, ensure target
250
  has all the keys from template. Missing keys are added with values
251
  from template.
252

253
  Args:
254
    target   - the dictionary to check
255
    template - template dictionary
256
    logname  - a caller-chosen string to identify the debug log
257
               entry; if None, no logging will be done
258

259
  Returns value:
260
    None
261

262
  """
263
  missing = []
264
  for k in template:
265
    if k not in target:
266
      missing.append(k)
267
      target[k] = template[k]
268

    
269
  if missing and logname:
270
    logging.warning('%s missing keys %s', logname, ', '.join(missing))
271

    
272

    
273
def IsProcessAlive(pid):
274
  """Check if a given pid exists on the system.
275

276
  Returns: true or false, depending on if the pid exists or not
277

278
  Remarks: zombie processes treated as not alive, and giving a pid <=
279
  0 makes the function to return False.
280

281
  """
282
  if pid <= 0:
283
    return False
284

    
285
  try:
286
    f = open("/proc/%d/status" % pid)
287
  except IOError, err:
288
    if err.errno in (errno.ENOENT, errno.ENOTDIR):
289
      return False
290

    
291
  alive = True
292
  try:
293
    data = f.readlines()
294
    if len(data) > 1:
295
      state = data[1].split()
296
      if len(state) > 1 and state[1] == "Z":
297
        alive = False
298
  finally:
299
    f.close()
300

    
301
  return alive
302

    
303

    
304
def ReadPidFile(pidfile):
305
  """Read the pid from a file.
306

307
  @param pidfile: Path to a file containing the pid to be checked
308
  @type  pidfile: string (filename)
309
  @return: The process id, if the file exista and contains a valid PID,
310
           otherwise 0
311
  @rtype: int
312

313
  """
314
  try:
315
    pf = open(pidfile, 'r')
316
  except EnvironmentError, err:
317
    if err.errno != errno.ENOENT:
318
      logging.exception("Can't read pid file?!")
319
    return 0
320

    
321
  try:
322
    pid = int(pf.read())
323
  except ValueError, err:
324
    logging.info("Can't parse pid file contents", exc_info=True)
325
    return 0
326

    
327
  return pid
328

    
329

    
330
def MatchNameComponent(key, name_list):
331
  """Try to match a name against a list.
332

333
  This function will try to match a name like test1 against a list
334
  like ['test1.example.com', 'test2.example.com', ...]. Against this
335
  list, 'test1' as well as 'test1.example' will match, but not
336
  'test1.ex'. A multiple match will be considered as no match at all
337
  (e.g. 'test1' against ['test1.example.com', 'test1.example.org']).
338

339
  Args:
340
    key: the name to be searched
341
    name_list: the list of strings against which to search the key
342

343
  Returns:
344
    None if there is no match *or* if there are multiple matches
345
    otherwise the element from the list which matches
346

347
  """
348
  mo = re.compile("^%s(\..*)?$" % re.escape(key))
349
  names_filtered = [name for name in name_list if mo.match(name) is not None]
350
  if len(names_filtered) != 1:
351
    return None
352
  return names_filtered[0]
353

    
354

    
355
class HostInfo:
356
  """Class implementing resolver and hostname functionality
357

358
  """
359
  def __init__(self, name=None):
360
    """Initialize the host name object.
361

362
    If the name argument is not passed, it will use this system's
363
    name.
364

365
    """
366
    if name is None:
367
      name = self.SysName()
368

    
369
    self.query = name
370
    self.name, self.aliases, self.ipaddrs = self.LookupHostname(name)
371
    self.ip = self.ipaddrs[0]
372

    
373
  def ShortName(self):
374
    """Returns the hostname without domain.
375

376
    """
377
    return self.name.split('.')[0]
378

    
379
  @staticmethod
380
  def SysName():
381
    """Return the current system's name.
382

383
    This is simply a wrapper over socket.gethostname()
384

385
    """
386
    return socket.gethostname()
387

    
388
  @staticmethod
389
  def LookupHostname(hostname):
390
    """Look up hostname
391

392
    Args:
393
      hostname: hostname to look up
394

395
    Returns:
396
      a tuple (name, aliases, ipaddrs) as returned by socket.gethostbyname_ex
397
      in case of errors in resolving, we raise a ResolverError
398

399
    """
400
    try:
401
      result = socket.gethostbyname_ex(hostname)
402
    except socket.gaierror, err:
403
      # hostname not found in DNS
404
      raise errors.ResolverError(hostname, err.args[0], err.args[1])
405

    
406
    return result
407

    
408

    
409
def ListVolumeGroups():
410
  """List volume groups and their size
411

412
  Returns:
413
     Dictionary with keys volume name and values the size of the volume
414

415
  """
416
  command = "vgs --noheadings --units m --nosuffix -o name,size"
417
  result = RunCmd(command)
418
  retval = {}
419
  if result.failed:
420
    return retval
421

    
422
  for line in result.stdout.splitlines():
423
    try:
424
      name, size = line.split()
425
      size = int(float(size))
426
    except (IndexError, ValueError), err:
427
      logging.error("Invalid output from vgs (%s): %s", err, line)
428
      continue
429

    
430
    retval[name] = size
431

    
432
  return retval
433

    
434

    
435
def BridgeExists(bridge):
436
  """Check whether the given bridge exists in the system
437

438
  Returns:
439
     True if it does, false otherwise.
440

441
  """
442
  return os.path.isdir("/sys/class/net/%s/bridge" % bridge)
443

    
444

    
445
def NiceSort(name_list):
446
  """Sort a list of strings based on digit and non-digit groupings.
447

448
  Given a list of names ['a1', 'a10', 'a11', 'a2'] this function will
449
  sort the list in the logical order ['a1', 'a2', 'a10', 'a11'].
450

451
  The sort algorithm breaks each name in groups of either only-digits
452
  or no-digits. Only the first eight such groups are considered, and
453
  after that we just use what's left of the string.
454

455
  Return value
456
    - a copy of the list sorted according to our algorithm
457

458
  """
459
  _SORTER_BASE = "(\D+|\d+)"
460
  _SORTER_FULL = "^%s%s?%s?%s?%s?%s?%s?%s?.*$" % (_SORTER_BASE, _SORTER_BASE,
461
                                                  _SORTER_BASE, _SORTER_BASE,
462
                                                  _SORTER_BASE, _SORTER_BASE,
463
                                                  _SORTER_BASE, _SORTER_BASE)
464
  _SORTER_RE = re.compile(_SORTER_FULL)
465
  _SORTER_NODIGIT = re.compile("^\D*$")
466
  def _TryInt(val):
467
    """Attempts to convert a variable to integer."""
468
    if val is None or _SORTER_NODIGIT.match(val):
469
      return val
470
    rval = int(val)
471
    return rval
472

    
473
  to_sort = [([_TryInt(grp) for grp in _SORTER_RE.match(name).groups()], name)
474
             for name in name_list]
475
  to_sort.sort()
476
  return [tup[1] for tup in to_sort]
477

    
478

    
479
def TryConvert(fn, val):
480
  """Try to convert a value ignoring errors.
481

482
  This function tries to apply function `fn` to `val`. If no
483
  ValueError or TypeError exceptions are raised, it will return the
484
  result, else it will return the original value. Any other exceptions
485
  are propagated to the caller.
486

487
  """
488
  try:
489
    nv = fn(val)
490
  except (ValueError, TypeError), err:
491
    nv = val
492
  return nv
493

    
494

    
495
def IsValidIP(ip):
496
  """Verifies the syntax of an IP address.
497

498
  This function checks if the ip address passes is valid or not based
499
  on syntax (not ip range, class calculations or anything).
500

501
  """
502
  unit = "(0|[1-9]\d{0,2})"
503
  return re.match("^%s\.%s\.%s\.%s$" % (unit, unit, unit, unit), ip)
504

    
505

    
506
def IsValidShellParam(word):
507
  """Verifies is the given word is safe from the shell's p.o.v.
508

509
  This means that we can pass this to a command via the shell and be
510
  sure that it doesn't alter the command line and is passed as such to
511
  the actual command.
512

513
  Note that we are overly restrictive here, in order to be on the safe
514
  side.
515

516
  """
517
  return bool(re.match("^[-a-zA-Z0-9._+/:%@]+$", word))
518

    
519

    
520
def BuildShellCmd(template, *args):
521
  """Build a safe shell command line from the given arguments.
522

523
  This function will check all arguments in the args list so that they
524
  are valid shell parameters (i.e. they don't contain shell
525
  metacharaters). If everything is ok, it will return the result of
526
  template % args.
527

528
  """
529
  for word in args:
530
    if not IsValidShellParam(word):
531
      raise errors.ProgrammerError("Shell argument '%s' contains"
532
                                   " invalid characters" % word)
533
  return template % args
534

    
535

    
536
def FormatUnit(value):
537
  """Formats an incoming number of MiB with the appropriate unit.
538

539
  Value needs to be passed as a numeric type. Return value is always a string.
540

541
  """
542
  if value < 1024:
543
    return "%dM" % round(value, 0)
544

    
545
  elif value < (1024 * 1024):
546
    return "%0.1fG" % round(float(value) / 1024, 1)
547

    
548
  else:
549
    return "%0.1fT" % round(float(value) / 1024 / 1024, 1)
550

    
551

    
552
def ParseUnit(input_string):
553
  """Tries to extract number and scale from the given string.
554

555
  Input must be in the format NUMBER+ [DOT NUMBER+] SPACE* [UNIT]. If no unit
556
  is specified, it defaults to MiB. Return value is always an int in MiB.
557

558
  """
559
  m = re.match('^([.\d]+)\s*([a-zA-Z]+)?$', input_string)
560
  if not m:
561
    raise errors.UnitParseError("Invalid format")
562

    
563
  value = float(m.groups()[0])
564

    
565
  unit = m.groups()[1]
566
  if unit:
567
    lcunit = unit.lower()
568
  else:
569
    lcunit = 'm'
570

    
571
  if lcunit in ('m', 'mb', 'mib'):
572
    # Value already in MiB
573
    pass
574

    
575
  elif lcunit in ('g', 'gb', 'gib'):
576
    value *= 1024
577

    
578
  elif lcunit in ('t', 'tb', 'tib'):
579
    value *= 1024 * 1024
580

    
581
  else:
582
    raise errors.UnitParseError("Unknown unit: %s" % unit)
583

    
584
  # Make sure we round up
585
  if int(value) < value:
586
    value += 1
587

    
588
  # Round up to the next multiple of 4
589
  value = int(value)
590
  if value % 4:
591
    value += 4 - value % 4
592

    
593
  return value
594

    
595

    
596
def AddAuthorizedKey(file_name, key):
597
  """Adds an SSH public key to an authorized_keys file.
598

599
  Args:
600
    file_name: Path to authorized_keys file
601
    key: String containing key
602
  """
603
  key_fields = key.split()
604

    
605
  f = open(file_name, 'a+')
606
  try:
607
    nl = True
608
    for line in f:
609
      # Ignore whitespace changes
610
      if line.split() == key_fields:
611
        break
612
      nl = line.endswith('\n')
613
    else:
614
      if not nl:
615
        f.write("\n")
616
      f.write(key.rstrip('\r\n'))
617
      f.write("\n")
618
      f.flush()
619
  finally:
620
    f.close()
621

    
622

    
623
def RemoveAuthorizedKey(file_name, key):
624
  """Removes an SSH public key from an authorized_keys file.
625

626
  Args:
627
    file_name: Path to authorized_keys file
628
    key: String containing key
629
  """
630
  key_fields = key.split()
631

    
632
  fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
633
  try:
634
    out = os.fdopen(fd, 'w')
635
    try:
636
      f = open(file_name, 'r')
637
      try:
638
        for line in f:
639
          # Ignore whitespace changes while comparing lines
640
          if line.split() != key_fields:
641
            out.write(line)
642

    
643
        out.flush()
644
        os.rename(tmpname, file_name)
645
      finally:
646
        f.close()
647
    finally:
648
      out.close()
649
  except:
650
    RemoveFile(tmpname)
651
    raise
652

    
653

    
654
def SetEtcHostsEntry(file_name, ip, hostname, aliases):
655
  """Sets the name of an IP address and hostname in /etc/hosts.
656

657
  """
658
  # Ensure aliases are unique
659
  aliases = UniqueSequence([hostname] + aliases)[1:]
660

    
661
  fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
662
  try:
663
    out = os.fdopen(fd, 'w')
664
    try:
665
      f = open(file_name, 'r')
666
      try:
667
        written = False
668
        for line in f:
669
          fields = line.split()
670
          if fields and not fields[0].startswith('#') and ip == fields[0]:
671
            continue
672
          out.write(line)
673

    
674
        out.write("%s\t%s" % (ip, hostname))
675
        if aliases:
676
          out.write(" %s" % ' '.join(aliases))
677
        out.write('\n')
678

    
679
        out.flush()
680
        os.fsync(out)
681
        os.rename(tmpname, file_name)
682
      finally:
683
        f.close()
684
    finally:
685
      out.close()
686
  except:
687
    RemoveFile(tmpname)
688
    raise
689

    
690

    
691
def AddHostToEtcHosts(hostname):
692
  """Wrapper around SetEtcHostsEntry.
693

694
  """
695
  hi = HostInfo(name=hostname)
696
  SetEtcHostsEntry(constants.ETC_HOSTS, hi.ip, hi.name, [hi.ShortName()])
697

    
698

    
699
def RemoveEtcHostsEntry(file_name, hostname):
700
  """Removes a hostname from /etc/hosts.
701

702
  IP addresses without names are removed from the file.
703
  """
704
  fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
705
  try:
706
    out = os.fdopen(fd, 'w')
707
    try:
708
      f = open(file_name, 'r')
709
      try:
710
        for line in f:
711
          fields = line.split()
712
          if len(fields) > 1 and not fields[0].startswith('#'):
713
            names = fields[1:]
714
            if hostname in names:
715
              while hostname in names:
716
                names.remove(hostname)
717
              if names:
718
                out.write("%s %s\n" % (fields[0], ' '.join(names)))
719
              continue
720

    
721
          out.write(line)
722

    
723
        out.flush()
724
        os.fsync(out)
725
        os.rename(tmpname, file_name)
726
      finally:
727
        f.close()
728
    finally:
729
      out.close()
730
  except:
731
    RemoveFile(tmpname)
732
    raise
733

    
734

    
735
def RemoveHostFromEtcHosts(hostname):
736
  """Wrapper around RemoveEtcHostsEntry.
737

738
  """
739
  hi = HostInfo(name=hostname)
740
  RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.name)
741
  RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName())
742

    
743

    
744
def CreateBackup(file_name):
745
  """Creates a backup of a file.
746

747
  Returns: the path to the newly created backup file.
748

749
  """
750
  if not os.path.isfile(file_name):
751
    raise errors.ProgrammerError("Can't make a backup of a non-file '%s'" %
752
                                file_name)
753

    
754
  prefix = '%s.backup-%d.' % (os.path.basename(file_name), int(time.time()))
755
  dir_name = os.path.dirname(file_name)
756

    
757
  fsrc = open(file_name, 'rb')
758
  try:
759
    (fd, backup_name) = tempfile.mkstemp(prefix=prefix, dir=dir_name)
760
    fdst = os.fdopen(fd, 'wb')
761
    try:
762
      shutil.copyfileobj(fsrc, fdst)
763
    finally:
764
      fdst.close()
765
  finally:
766
    fsrc.close()
767

    
768
  return backup_name
769

    
770

    
771
def ShellQuote(value):
772
  """Quotes shell argument according to POSIX.
773

774
  """
775
  if _re_shell_unquoted.match(value):
776
    return value
777
  else:
778
    return "'%s'" % value.replace("'", "'\\''")
779

    
780

    
781
def ShellQuoteArgs(args):
782
  """Quotes all given shell arguments and concatenates using spaces.
783

784
  """
785
  return ' '.join([ShellQuote(i) for i in args])
786

    
787

    
788
def TcpPing(target, port, timeout=10, live_port_needed=False, source=None):
789
  """Simple ping implementation using TCP connect(2).
790

791
  Try to do a TCP connect(2) from an optional source IP to the
792
  specified target IP and the specified target port. If the optional
793
  parameter live_port_needed is set to true, requires the remote end
794
  to accept the connection. The timeout is specified in seconds and
795
  defaults to 10 seconds. If the source optional argument is not
796
  passed, the source address selection is left to the kernel,
797
  otherwise we try to connect using the passed address (failures to
798
  bind other than EADDRNOTAVAIL will be ignored).
799

800
  """
801
  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
802

    
803
  sucess = False
804

    
805
  if source is not None:
806
    try:
807
      sock.bind((source, 0))
808
    except socket.error, (errcode, errstring):
809
      if errcode == errno.EADDRNOTAVAIL:
810
        success = False
811

    
812
  sock.settimeout(timeout)
813

    
814
  try:
815
    sock.connect((target, port))
816
    sock.close()
817
    success = True
818
  except socket.timeout:
819
    success = False
820
  except socket.error, (errcode, errstring):
821
    success = (not live_port_needed) and (errcode == errno.ECONNREFUSED)
822

    
823
  return success
824

    
825

    
826
def ListVisibleFiles(path):
827
  """Returns a list of all visible files in a directory.
828

829
  """
830
  files = [i for i in os.listdir(path) if not i.startswith(".")]
831
  files.sort()
832
  return files
833

    
834

    
835
def GetHomeDir(user, default=None):
836
  """Try to get the homedir of the given user.
837

838
  The user can be passed either as a string (denoting the name) or as
839
  an integer (denoting the user id). If the user is not found, the
840
  'default' argument is returned, which defaults to None.
841

842
  """
843
  try:
844
    if isinstance(user, basestring):
845
      result = pwd.getpwnam(user)
846
    elif isinstance(user, (int, long)):
847
      result = pwd.getpwuid(user)
848
    else:
849
      raise errors.ProgrammerError("Invalid type passed to GetHomeDir (%s)" %
850
                                   type(user))
851
  except KeyError:
852
    return default
853
  return result.pw_dir
854

    
855

    
856
def NewUUID():
857
  """Returns a random UUID.
858

859
  """
860
  f = open("/proc/sys/kernel/random/uuid", "r")
861
  try:
862
    return f.read(128).rstrip("\n")
863
  finally:
864
    f.close()
865

    
866

    
867
def GenerateSecret():
868
  """Generates a random secret.
869

870
  This will generate a pseudo-random secret, and return its sha digest
871
  (so that it can be used where an ASCII string is needed).
872

873
  """
874
  return sha.new(os.urandom(64)).hexdigest()
875

    
876

    
877
def ReadFile(file_name, size=None):
878
  """Reads a file.
879

880
  @type size: None or int
881
  @param size: Read at most size bytes
882

883
  """
884
  f = open(file_name, "r")
885
  try:
886
    if size is None:
887
      return f.read()
888
    else:
889
      return f.read(size)
890
  finally:
891
    f.close()
892

    
893

    
894
def WriteFile(file_name, fn=None, data=None,
895
              mode=None, uid=-1, gid=-1,
896
              atime=None, mtime=None, close=True,
897
              dry_run=False, backup=False,
898
              prewrite=None, postwrite=None):
899
  """(Over)write a file atomically.
900

901
  The file_name and either fn (a function taking one argument, the
902
  file descriptor, and which should write the data to it) or data (the
903
  contents of the file) must be passed. The other arguments are
904
  optional and allow setting the file mode, owner and group, and the
905
  mtime/atime of the file.
906

907
  If the function doesn't raise an exception, it has succeeded and the
908
  target file has the new contents. If the file has raised an
909
  exception, an existing target file should be unmodified and the
910
  temporary file should be removed.
911

912
  Args:
913
    file_name: New filename
914
    fn: Content writing function, called with file descriptor as parameter
915
    data: Content as string
916
    mode: File mode
917
    uid: Owner
918
    gid: Group
919
    atime: Access time
920
    mtime: Modification time
921
    close: Whether to close file after writing it
922
    prewrite: Function object called before writing content
923
    postwrite: Function object called after writing content
924

925
  Returns:
926
    None if "close" parameter evaluates to True, otherwise file descriptor.
927

928
  """
929
  if not os.path.isabs(file_name):
930
    raise errors.ProgrammerError("Path passed to WriteFile is not"
931
                                 " absolute: '%s'" % file_name)
932

    
933
  if [fn, data].count(None) != 1:
934
    raise errors.ProgrammerError("fn or data required")
935

    
936
  if [atime, mtime].count(None) == 1:
937
    raise errors.ProgrammerError("Both atime and mtime must be either"
938
                                 " set or None")
939

    
940
  if backup and not dry_run and os.path.isfile(file_name):
941
    CreateBackup(file_name)
942

    
943
  dir_name, base_name = os.path.split(file_name)
944
  fd, new_name = tempfile.mkstemp('.new', base_name, dir_name)
945
  # here we need to make sure we remove the temp file, if any error
946
  # leaves it in place
947
  try:
948
    if uid != -1 or gid != -1:
949
      os.chown(new_name, uid, gid)
950
    if mode:
951
      os.chmod(new_name, mode)
952
    if callable(prewrite):
953
      prewrite(fd)
954
    if data is not None:
955
      os.write(fd, data)
956
    else:
957
      fn(fd)
958
    if callable(postwrite):
959
      postwrite(fd)
960
    os.fsync(fd)
961
    if atime is not None and mtime is not None:
962
      os.utime(new_name, (atime, mtime))
963
    if not dry_run:
964
      os.rename(new_name, file_name)
965
  finally:
966
    if close:
967
      os.close(fd)
968
      result = None
969
    else:
970
      result = fd
971
    RemoveFile(new_name)
972

    
973
  return result
974

    
975

    
976
def FirstFree(seq, base=0):
977
  """Returns the first non-existing integer from seq.
978

979
  The seq argument should be a sorted list of positive integers. The
980
  first time the index of an element is smaller than the element
981
  value, the index will be returned.
982

983
  The base argument is used to start at a different offset,
984
  i.e. [3, 4, 6] with offset=3 will return 5.
985

986
  Example: [0, 1, 3] will return 2.
987

988
  """
989
  for idx, elem in enumerate(seq):
990
    assert elem >= base, "Passed element is higher than base offset"
991
    if elem > idx + base:
992
      # idx is not used
993
      return idx + base
994
  return None
995

    
996

    
997
def all(seq, pred=bool):
998
  "Returns True if pred(x) is True for every element in the iterable"
999
  for elem in itertools.ifilterfalse(pred, seq):
1000
    return False
1001
  return True
1002

    
1003

    
1004
def any(seq, pred=bool):
1005
  "Returns True if pred(x) is True for at least one element in the iterable"
1006
  for elem in itertools.ifilter(pred, seq):
1007
    return True
1008
  return False
1009

    
1010

    
1011
def UniqueSequence(seq):
1012
  """Returns a list with unique elements.
1013

1014
  Element order is preserved.
1015
  """
1016
  seen = set()
1017
  return [i for i in seq if i not in seen and not seen.add(i)]
1018

    
1019

    
1020
def IsValidMac(mac):
1021
  """Predicate to check if a MAC address is valid.
1022

1023
  Checks wether the supplied MAC address is formally correct, only
1024
  accepts colon separated format.
1025
  """
1026
  mac_check = re.compile("^([0-9a-f]{2}(:|$)){6}$")
1027
  return mac_check.match(mac) is not None
1028

    
1029

    
1030
def TestDelay(duration):
1031
  """Sleep for a fixed amount of time.
1032

1033
  """
1034
  if duration < 0:
1035
    return False
1036
  time.sleep(duration)
1037
  return True
1038

    
1039

    
1040
def Daemonize(logfile, noclose_fds=None):
1041
  """Daemonize the current process.
1042

1043
  This detaches the current process from the controlling terminal and
1044
  runs it in the background as a daemon.
1045

1046
  """
1047
  UMASK = 077
1048
  WORKDIR = "/"
1049
  # Default maximum for the number of available file descriptors.
1050
  if 'SC_OPEN_MAX' in os.sysconf_names:
1051
    try:
1052
      MAXFD = os.sysconf('SC_OPEN_MAX')
1053
      if MAXFD < 0:
1054
        MAXFD = 1024
1055
    except OSError:
1056
      MAXFD = 1024
1057
  else:
1058
    MAXFD = 1024
1059

    
1060
  # this might fail
1061
  pid = os.fork()
1062
  if (pid == 0):  # The first child.
1063
    os.setsid()
1064
    # this might fail
1065
    pid = os.fork() # Fork a second child.
1066
    if (pid == 0):  # The second child.
1067
      os.chdir(WORKDIR)
1068
      os.umask(UMASK)
1069
    else:
1070
      # exit() or _exit()?  See below.
1071
      os._exit(0) # Exit parent (the first child) of the second child.
1072
  else:
1073
    os._exit(0) # Exit parent of the first child.
1074
  maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
1075
  if (maxfd == resource.RLIM_INFINITY):
1076
    maxfd = MAXFD
1077

    
1078
  # Iterate through and close all file descriptors.
1079
  for fd in range(0, maxfd):
1080
    if noclose_fds and fd in noclose_fds:
1081
      continue
1082
    try:
1083
      os.close(fd)
1084
    except OSError: # ERROR, fd wasn't open to begin with (ignored)
1085
      pass
1086
  os.open(logfile, os.O_RDWR|os.O_CREAT|os.O_APPEND, 0600)
1087
  # Duplicate standard input to standard output and standard error.
1088
  os.dup2(0, 1)     # standard output (1)
1089
  os.dup2(0, 2)     # standard error (2)
1090
  return 0
1091

    
1092

    
1093
def DaemonPidFileName(name):
1094
  """Compute a ganeti pid file absolute path, given the daemon name.
1095

1096
  """
1097
  return os.path.join(constants.RUN_GANETI_DIR, "%s.pid" % name)
1098

    
1099

    
1100
def WritePidFile(name):
1101
  """Write the current process pidfile.
1102

1103
  The file will be written to constants.RUN_GANETI_DIR/name.pid
1104

1105
  """
1106
  pid = os.getpid()
1107
  pidfilename = DaemonPidFileName(name)
1108
  if IsProcessAlive(ReadPidFile(pidfilename)):
1109
    raise errors.GenericError("%s contains a live process" % pidfilename)
1110

    
1111
  WriteFile(pidfilename, data="%d\n" % pid)
1112

    
1113

    
1114
def RemovePidFile(name):
1115
  """Remove the current process pidfile.
1116

1117
  Any errors are ignored.
1118

1119
  """
1120
  pid = os.getpid()
1121
  pidfilename = DaemonPidFileName(name)
1122
  # TODO: we could check here that the file contains our pid
1123
  try:
1124
    RemoveFile(pidfilename)
1125
  except:
1126
    pass
1127

    
1128

    
1129
def KillProcess(pid, signal_=signal.SIGTERM, timeout=30):
1130
  """Kill a process given by its pid.
1131

1132
  @type pid: int
1133
  @param pid: The PID to terminate.
1134
  @type signal_: int
1135
  @param signal_: The signal to send, by default SIGTERM
1136
  @type timeout: int
1137
  @param timeout: The timeout after which, if the process is still alive,
1138
                  a SIGKILL will be sent. If not positive, no such checking
1139
                  will be done
1140

1141
  """
1142
  if pid <= 0:
1143
    # kill with pid=0 == suicide
1144
    raise errors.ProgrammerError("Invalid pid given '%s'" % pid)
1145

    
1146
  if not IsProcessAlive(pid):
1147
    return
1148
  os.kill(pid, signal_)
1149
  if timeout <= 0:
1150
    return
1151
  end = time.time() + timeout
1152
  while time.time() < end and IsProcessAlive(pid):
1153
    time.sleep(0.1)
1154
  if IsProcessAlive(pid):
1155
    os.kill(pid, signal.SIGKILL)
1156

    
1157

    
1158
def FindFile(name, search_path, test=os.path.exists):
1159
  """Look for a filesystem object in a given path.
1160

1161
  This is an abstract method to search for filesystem object (files,
1162
  dirs) under a given search path.
1163

1164
  Args:
1165
    - name: the name to look for
1166
    - search_path: list of directory names
1167
    - test: the test which the full path must satisfy
1168
      (defaults to os.path.exists)
1169

1170
  Returns:
1171
    - full path to the item if found
1172
    - None otherwise
1173

1174
  """
1175
  for dir_name in search_path:
1176
    item_name = os.path.sep.join([dir_name, name])
1177
    if test(item_name):
1178
      return item_name
1179
  return None
1180

    
1181

    
1182
def CheckVolumeGroupSize(vglist, vgname, minsize):
1183
  """Checks if the volume group list is valid.
1184

1185
  A non-None return value means there's an error, and the return value
1186
  is the error message.
1187

1188
  """
1189
  vgsize = vglist.get(vgname, None)
1190
  if vgsize is None:
1191
    return "volume group '%s' missing" % vgname
1192
  elif vgsize < minsize:
1193
    return ("volume group '%s' too small (%s MiB required, %d MiB found)" %
1194
            (vgname, minsize, vgsize))
1195
  return None
1196

    
1197

    
1198
def SplitTime(value):
1199
  """Splits time as floating point number into a tuple.
1200

1201
  @param value: Time in seconds
1202
  @type value: int or float
1203
  @return: Tuple containing (seconds, microseconds)
1204

1205
  """
1206
  (seconds, microseconds) = divmod(int(value * 1000000), 1000000)
1207

    
1208
  assert 0 <= seconds, \
1209
    "Seconds must be larger than or equal to 0, but are %s" % seconds
1210
  assert 0 <= microseconds <= 999999, \
1211
    "Microseconds must be 0-999999, but are %s" % microseconds
1212

    
1213
  return (int(seconds), int(microseconds))
1214

    
1215

    
1216
def MergeTime(timetuple):
1217
  """Merges a tuple into time as a floating point number.
1218

1219
  @param timetuple: Time as tuple, (seconds, microseconds)
1220
  @type timetuple: tuple
1221
  @return: Time as a floating point number expressed in seconds
1222

1223
  """
1224
  (seconds, microseconds) = timetuple
1225

    
1226
  assert 0 <= seconds, \
1227
    "Seconds must be larger than or equal to 0, but are %s" % seconds
1228
  assert 0 <= microseconds <= 999999, \
1229
    "Microseconds must be 0-999999, but are %s" % microseconds
1230

    
1231
  return float(seconds) + (float(microseconds) * 0.000001)
1232

    
1233

    
1234
def GetNodeDaemonPort():
1235
  """Get the node daemon port for this cluster.
1236

1237
  Note that this routine does not read a ganeti-specific file, but
1238
  instead uses socket.getservbyname to allow pre-customization of
1239
  this parameter outside of Ganeti.
1240

1241
  """
1242
  try:
1243
    port = socket.getservbyname("ganeti-noded", "tcp")
1244
  except socket.error:
1245
    port = constants.DEFAULT_NODED_PORT
1246

    
1247
  return port
1248

    
1249

    
1250
def GetNodeDaemonPassword():
1251
  """Get the node password for the cluster.
1252

1253
  """
1254
  return ReadFile(constants.CLUSTER_PASSWORD_FILE)
1255

    
1256

    
1257
def LockedMethod(fn):
1258
  """Synchronized object access decorator.
1259

1260
  This decorator is intended to protect access to an object using the
1261
  object's own lock which is hardcoded to '_lock'.
1262

1263
  """
1264
  def _LockDebug(*args, **kwargs):
1265
    if debug_locks:
1266
      logging.debug(*args, **kwargs)
1267

    
1268
  def wrapper(self, *args, **kwargs):
1269
    assert hasattr(self, '_lock')
1270
    lock = self._lock
1271
    _LockDebug("Waiting for %s", lock)
1272
    lock.acquire()
1273
    try:
1274
      _LockDebug("Acquired %s", lock)
1275
      result = fn(self, *args, **kwargs)
1276
    finally:
1277
      _LockDebug("Releasing %s", lock)
1278
      lock.release()
1279
      _LockDebug("Released %s", lock)
1280
    return result
1281
  return wrapper
1282

    
1283

    
1284
def LockFile(fd):
1285
  """Locks a file using POSIX locks.
1286

1287
  """
1288
  try:
1289
    fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
1290
  except IOError, err:
1291
    if err.errno == errno.EAGAIN:
1292
      raise errors.LockError("File already locked")
1293
    raise
1294

    
1295

    
1296
class FileLock(object):
1297
  """Utility class for file locks.
1298

1299
  """
1300
  def __init__(self, filename):
1301
    self.filename = filename
1302
    self.fd = open(self.filename, "w")
1303

    
1304
  def __del__(self):
1305
    self.Close()
1306

    
1307
  def Close(self):
1308
    if self.fd:
1309
      self.fd.close()
1310
      self.fd = None
1311

    
1312
  def _flock(self, flag, blocking, timeout, errmsg):
1313
    """Wrapper for fcntl.flock.
1314

1315
    @type flag: int
1316
    @param flag: Operation flag
1317
    @type blocking: bool
1318
    @param blocking: Whether the operation should be done in blocking mode.
1319
    @type timeout: None or float
1320
    @param timeout: For how long the operation should be retried (implies
1321
                    non-blocking mode).
1322
    @type errmsg: string
1323
    @param errmsg: Error message in case operation fails.
1324

1325
    """
1326
    assert self.fd, "Lock was closed"
1327
    assert timeout is None or timeout >= 0, \
1328
      "If specified, timeout must be positive"
1329

    
1330
    if timeout is not None:
1331
      flag |= fcntl.LOCK_NB
1332
      timeout_end = time.time() + timeout
1333

    
1334
    # Blocking doesn't have effect with timeout
1335
    elif not blocking:
1336
      flag |= fcntl.LOCK_NB
1337
      timeout_end = None
1338

    
1339
    retry = True
1340
    while retry:
1341
      try:
1342
        fcntl.flock(self.fd, flag)
1343
        retry = False
1344
      except IOError, err:
1345
        if err.errno in (errno.EAGAIN, ):
1346
          if timeout_end is not None and time.time() < timeout_end:
1347
            # Wait before trying again
1348
            time.sleep(max(0.1, min(1.0, timeout)))
1349
          else:
1350
            raise errors.LockError(errmsg)
1351
        else:
1352
          logging.exception("fcntl.flock failed")
1353
          raise
1354

    
1355
  def Exclusive(self, blocking=False, timeout=None):
1356
    """Locks the file in exclusive mode.
1357

1358
    """
1359
    self._flock(fcntl.LOCK_EX, blocking, timeout,
1360
                "Failed to lock %s in exclusive mode" % self.filename)
1361

    
1362
  def Shared(self, blocking=False, timeout=None):
1363
    """Locks the file in shared mode.
1364

1365
    """
1366
    self._flock(fcntl.LOCK_SH, blocking, timeout,
1367
                "Failed to lock %s in shared mode" % self.filename)
1368

    
1369
  def Unlock(self, blocking=True, timeout=None):
1370
    """Unlocks the file.
1371

1372
    According to "man flock", unlocking can also be a nonblocking operation:
1373
    "To make a non-blocking request, include LOCK_NB with any of the above
1374
    operations"
1375

1376
    """
1377
    self._flock(fcntl.LOCK_UN, blocking, timeout,
1378
                "Failed to unlock %s" % self.filename)
1379

    
1380

    
1381
class SignalHandler(object):
1382
  """Generic signal handler class.
1383

1384
  It automatically restores the original handler when deconstructed or when
1385
  Reset() is called. You can either pass your own handler function in or query
1386
  the "called" attribute to detect whether the signal was sent.
1387

1388
  """
1389
  def __init__(self, signum):
1390
    """Constructs a new SignalHandler instance.
1391

1392
    @param signum: Single signal number or set of signal numbers
1393

1394
    """
1395
    if isinstance(signum, (int, long)):
1396
      self.signum = set([signum])
1397
    else:
1398
      self.signum = set(signum)
1399

    
1400
    self.called = False
1401

    
1402
    self._previous = {}
1403
    try:
1404
      for signum in self.signum:
1405
        # Setup handler
1406
        prev_handler = signal.signal(signum, self._HandleSignal)
1407
        try:
1408
          self._previous[signum] = prev_handler
1409
        except:
1410
          # Restore previous handler
1411
          signal.signal(signum, prev_handler)
1412
          raise
1413
    except:
1414
      # Reset all handlers
1415
      self.Reset()
1416
      # Here we have a race condition: a handler may have already been called,
1417
      # but there's not much we can do about it at this point.
1418
      raise
1419

    
1420
  def __del__(self):
1421
    self.Reset()
1422

    
1423
  def Reset(self):
1424
    """Restore previous handler.
1425

1426
    """
1427
    for signum, prev_handler in self._previous.items():
1428
      signal.signal(signum, prev_handler)
1429
      # If successful, remove from dict
1430
      del self._previous[signum]
1431

    
1432
  def Clear(self):
1433
    """Unsets "called" flag.
1434

1435
    This function can be used in case a signal may arrive several times.
1436

1437
    """
1438
    self.called = False
1439

    
1440
  def _HandleSignal(self, signum, frame):
1441
    """Actual signal handling function.
1442

1443
    """
1444
    # This is not nice and not absolutely atomic, but it appears to be the only
1445
    # solution in Python -- there are no atomic types.
1446
    self.called = True