Statistics
| Branch: | Tag: | Revision:

root / lib / utils.py @ 8b3fd458

History | View | Annotate | Download (36.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Ganeti small utilities
23

24
"""
25

    
26

    
27
import sys
28
import os
29
import sha
30
import time
31
import subprocess
32
import re
33
import socket
34
import tempfile
35
import shutil
36
import errno
37
import pwd
38
import itertools
39
import select
40
import fcntl
41
import resource
42
import logging
43
import signal
44

    
45
from cStringIO import StringIO
46

    
47
from ganeti import errors
48
from ganeti import constants
49

    
50

    
51
_locksheld = []
52
_re_shell_unquoted = re.compile('^[-.,=:/_+@A-Za-z0-9]+$')
53

    
54
debug = False
55
debug_locks = False
56
no_fork = False
57

    
58

    
59
class RunResult(object):
60
  """Simple class for holding the result of running external programs.
61

62
  Instance variables:
63
    exit_code: the exit code of the program, or None (if the program
64
               didn't exit())
65
    signal: numeric signal that caused the program to finish, or None
66
            (if the program wasn't terminated by a signal)
67
    stdout: the standard output of the program
68
    stderr: the standard error of the program
69
    failed: a Boolean value which is True in case the program was
70
            terminated by a signal or exited with a non-zero exit code
71
    fail_reason: a string detailing the termination reason
72

73
  """
74
  __slots__ = ["exit_code", "signal", "stdout", "stderr",
75
               "failed", "fail_reason", "cmd"]
76

    
77

    
78
  def __init__(self, exit_code, signal_, stdout, stderr, cmd):
79
    self.cmd = cmd
80
    self.exit_code = exit_code
81
    self.signal = signal_
82
    self.stdout = stdout
83
    self.stderr = stderr
84
    self.failed = (signal_ is not None or exit_code != 0)
85

    
86
    if self.signal is not None:
87
      self.fail_reason = "terminated by signal %s" % self.signal
88
    elif self.exit_code is not None:
89
      self.fail_reason = "exited with exit code %s" % self.exit_code
90
    else:
91
      self.fail_reason = "unable to determine termination reason"
92

    
93
    if self.failed:
94
      logging.debug("Command '%s' failed (%s); output: %s",
95
                    self.cmd, self.fail_reason, self.output)
96

    
97
  def _GetOutput(self):
98
    """Returns the combined stdout and stderr for easier usage.
99

100
    """
101
    return self.stdout + self.stderr
102

    
103
  output = property(_GetOutput, None, None, "Return full output")
104

    
105

    
106
def RunCmd(cmd, env=None):
107
  """Execute a (shell) command.
108

109
  The command should not read from its standard input, as it will be
110
  closed.
111

112
  @param cmd: Command to run
113
  @type  cmd: string or list
114
  @param env: Additional environment
115
  @type env: dict
116
  @return: `RunResult` instance
117
  @rtype: RunResult
118

119
  """
120
  if no_fork:
121
    raise errors.ProgrammerError("utils.RunCmd() called with fork() disabled")
122

    
123
  if isinstance(cmd, list):
124
    cmd = [str(val) for val in cmd]
125
    strcmd = " ".join(cmd)
126
    shell = False
127
  else:
128
    strcmd = cmd
129
    shell = True
130
  logging.debug("RunCmd '%s'", strcmd)
131

    
132
  cmd_env = os.environ.copy()
133
  cmd_env["LC_ALL"] = "C"
134
  if env is not None:
135
    cmd_env.update(env)
136

    
137
  poller = select.poll()
138
  child = subprocess.Popen(cmd, shell=shell,
139
                           stderr=subprocess.PIPE,
140
                           stdout=subprocess.PIPE,
141
                           stdin=subprocess.PIPE,
142
                           close_fds=True, env=cmd_env)
143

    
144
  child.stdin.close()
145
  poller.register(child.stdout, select.POLLIN)
146
  poller.register(child.stderr, select.POLLIN)
147
  out = StringIO()
148
  err = StringIO()
149
  fdmap = {
150
    child.stdout.fileno(): (out, child.stdout),
151
    child.stderr.fileno(): (err, child.stderr),
152
    }
153
  for fd in fdmap:
154
    status = fcntl.fcntl(fd, fcntl.F_GETFL)
155
    fcntl.fcntl(fd, fcntl.F_SETFL, status | os.O_NONBLOCK)
156

    
157
  while fdmap:
158
    for fd, event in poller.poll():
159
      if event & select.POLLIN or event & select.POLLPRI:
160
        data = fdmap[fd][1].read()
161
        # no data from read signifies EOF (the same as POLLHUP)
162
        if not data:
163
          poller.unregister(fd)
164
          del fdmap[fd]
165
          continue
166
        fdmap[fd][0].write(data)
167
      if (event & select.POLLNVAL or event & select.POLLHUP or
168
          event & select.POLLERR):
169
        poller.unregister(fd)
170
        del fdmap[fd]
171

    
172
  out = out.getvalue()
173
  err = err.getvalue()
174

    
175
  status = child.wait()
176
  if status >= 0:
177
    exitcode = status
178
    signal_ = None
179
  else:
180
    exitcode = None
181
    signal_ = -status
182

    
183
  return RunResult(exitcode, signal_, out, err, strcmd)
184

    
185

    
186
def RemoveFile(filename):
187
  """Remove a file ignoring some errors.
188

189
  Remove a file, ignoring non-existing ones or directories. Other
190
  errors are passed.
191

192
  """
193
  try:
194
    os.unlink(filename)
195
  except OSError, err:
196
    if err.errno not in (errno.ENOENT, errno.EISDIR):
197
      raise
198

    
199

    
200
def _FingerprintFile(filename):
201
  """Compute the fingerprint of a file.
202

203
  If the file does not exist, a None will be returned
204
  instead.
205

206
  Args:
207
    filename - Filename (str)
208

209
  """
210
  if not (os.path.exists(filename) and os.path.isfile(filename)):
211
    return None
212

    
213
  f = open(filename)
214

    
215
  fp = sha.sha()
216
  while True:
217
    data = f.read(4096)
218
    if not data:
219
      break
220

    
221
    fp.update(data)
222

    
223
  return fp.hexdigest()
224

    
225

    
226
def FingerprintFiles(files):
227
  """Compute fingerprints for a list of files.
228

229
  Args:
230
    files - array of filenames.  ( [str, ...] )
231

232
  Return value:
233
    dictionary of filename: fingerprint for the files that exist
234

235
  """
236
  ret = {}
237

    
238
  for filename in files:
239
    cksum = _FingerprintFile(filename)
240
    if cksum:
241
      ret[filename] = cksum
242

    
243
  return ret
244

    
245

    
246
def CheckDict(target, template, logname=None):
247
  """Ensure a dictionary has a required set of keys.
248

249
  For the given dictionaries `target` and `template`, ensure target
250
  has all the keys from template. Missing keys are added with values
251
  from template.
252

253
  Args:
254
    target   - the dictionary to check
255
    template - template dictionary
256
    logname  - a caller-chosen string to identify the debug log
257
               entry; if None, no logging will be done
258

259
  Returns value:
260
    None
261

262
  """
263
  missing = []
264
  for k in template:
265
    if k not in target:
266
      missing.append(k)
267
      target[k] = template[k]
268

    
269
  if missing and logname:
270
    logging.warning('%s missing keys %s', logname, ', '.join(missing))
271

    
272

    
273
def IsProcessAlive(pid):
274
  """Check if a given pid exists on the system.
275

276
  Returns: true or false, depending on if the pid exists or not
277

278
  Remarks: zombie processes treated as not alive, and giving a pid <=
279
  0 makes the function to return False.
280

281
  """
282
  if pid <= 0:
283
    return False
284

    
285
  try:
286
    f = open("/proc/%d/status" % pid)
287
  except IOError, err:
288
    if err.errno in (errno.ENOENT, errno.ENOTDIR):
289
      return False
290

    
291
  alive = True
292
  try:
293
    data = f.readlines()
294
    if len(data) > 1:
295
      state = data[1].split()
296
      if len(state) > 1 and state[1] == "Z":
297
        alive = False
298
  finally:
299
    f.close()
300

    
301
  return alive
302

    
303

    
304
def ReadPidFile(pidfile):
305
  """Read the pid from a file.
306

307
  @param pidfile: Path to a file containing the pid to be checked
308
  @type  pidfile: string (filename)
309
  @return: The process id, if the file exista and contains a valid PID,
310
           otherwise 0
311
  @rtype: int
312

313
  """
314
  try:
315
    pf = open(pidfile, 'r')
316
  except EnvironmentError, err:
317
    if err.errno != errno.ENOENT:
318
      logging.exception("Can't read pid file?!")
319
    return 0
320

    
321
  try:
322
    pid = int(pf.read())
323
  except ValueError, err:
324
    logging.info("Can't parse pid file contents", exc_info=True)
325
    return 0
326

    
327
  return pid
328

    
329

    
330
def MatchNameComponent(key, name_list):
331
  """Try to match a name against a list.
332

333
  This function will try to match a name like test1 against a list
334
  like ['test1.example.com', 'test2.example.com', ...]. Against this
335
  list, 'test1' as well as 'test1.example' will match, but not
336
  'test1.ex'. A multiple match will be considered as no match at all
337
  (e.g. 'test1' against ['test1.example.com', 'test1.example.org']).
338

339
  Args:
340
    key: the name to be searched
341
    name_list: the list of strings against which to search the key
342

343
  Returns:
344
    None if there is no match *or* if there are multiple matches
345
    otherwise the element from the list which matches
346

347
  """
348
  mo = re.compile("^%s(\..*)?$" % re.escape(key))
349
  names_filtered = [name for name in name_list if mo.match(name) is not None]
350
  if len(names_filtered) != 1:
351
    return None
352
  return names_filtered[0]
353

    
354

    
355
class HostInfo:
356
  """Class implementing resolver and hostname functionality
357

358
  """
359
  def __init__(self, name=None):
360
    """Initialize the host name object.
361

362
    If the name argument is not passed, it will use this system's
363
    name.
364

365
    """
366
    if name is None:
367
      name = self.SysName()
368

    
369
    self.query = name
370
    self.name, self.aliases, self.ipaddrs = self.LookupHostname(name)
371
    self.ip = self.ipaddrs[0]
372

    
373
  def ShortName(self):
374
    """Returns the hostname without domain.
375

376
    """
377
    return self.name.split('.')[0]
378

    
379
  @staticmethod
380
  def SysName():
381
    """Return the current system's name.
382

383
    This is simply a wrapper over socket.gethostname()
384

385
    """
386
    return socket.gethostname()
387

    
388
  @staticmethod
389
  def LookupHostname(hostname):
390
    """Look up hostname
391

392
    Args:
393
      hostname: hostname to look up
394

395
    Returns:
396
      a tuple (name, aliases, ipaddrs) as returned by socket.gethostbyname_ex
397
      in case of errors in resolving, we raise a ResolverError
398

399
    """
400
    try:
401
      result = socket.gethostbyname_ex(hostname)
402
    except socket.gaierror, err:
403
      # hostname not found in DNS
404
      raise errors.ResolverError(hostname, err.args[0], err.args[1])
405

    
406
    return result
407

    
408

    
409
def ListVolumeGroups():
410
  """List volume groups and their size
411

412
  Returns:
413
     Dictionary with keys volume name and values the size of the volume
414

415
  """
416
  command = "vgs --noheadings --units m --nosuffix -o name,size"
417
  result = RunCmd(command)
418
  retval = {}
419
  if result.failed:
420
    return retval
421

    
422
  for line in result.stdout.splitlines():
423
    try:
424
      name, size = line.split()
425
      size = int(float(size))
426
    except (IndexError, ValueError), err:
427
      logging.error("Invalid output from vgs (%s): %s", err, line)
428
      continue
429

    
430
    retval[name] = size
431

    
432
  return retval
433

    
434

    
435
def BridgeExists(bridge):
436
  """Check whether the given bridge exists in the system
437

438
  Returns:
439
     True if it does, false otherwise.
440

441
  """
442
  return os.path.isdir("/sys/class/net/%s/bridge" % bridge)
443

    
444

    
445
def NiceSort(name_list):
446
  """Sort a list of strings based on digit and non-digit groupings.
447

448
  Given a list of names ['a1', 'a10', 'a11', 'a2'] this function will
449
  sort the list in the logical order ['a1', 'a2', 'a10', 'a11'].
450

451
  The sort algorithm breaks each name in groups of either only-digits
452
  or no-digits. Only the first eight such groups are considered, and
453
  after that we just use what's left of the string.
454

455
  Return value
456
    - a copy of the list sorted according to our algorithm
457

458
  """
459
  _SORTER_BASE = "(\D+|\d+)"
460
  _SORTER_FULL = "^%s%s?%s?%s?%s?%s?%s?%s?.*$" % (_SORTER_BASE, _SORTER_BASE,
461
                                                  _SORTER_BASE, _SORTER_BASE,
462
                                                  _SORTER_BASE, _SORTER_BASE,
463
                                                  _SORTER_BASE, _SORTER_BASE)
464
  _SORTER_RE = re.compile(_SORTER_FULL)
465
  _SORTER_NODIGIT = re.compile("^\D*$")
466
  def _TryInt(val):
467
    """Attempts to convert a variable to integer."""
468
    if val is None or _SORTER_NODIGIT.match(val):
469
      return val
470
    rval = int(val)
471
    return rval
472

    
473
  to_sort = [([_TryInt(grp) for grp in _SORTER_RE.match(name).groups()], name)
474
             for name in name_list]
475
  to_sort.sort()
476
  return [tup[1] for tup in to_sort]
477

    
478

    
479
def TryConvert(fn, val):
480
  """Try to convert a value ignoring errors.
481

482
  This function tries to apply function `fn` to `val`. If no
483
  ValueError or TypeError exceptions are raised, it will return the
484
  result, else it will return the original value. Any other exceptions
485
  are propagated to the caller.
486

487
  """
488
  try:
489
    nv = fn(val)
490
  except (ValueError, TypeError), err:
491
    nv = val
492
  return nv
493

    
494

    
495
def IsValidIP(ip):
496
  """Verifies the syntax of an IP address.
497

498
  This function checks if the ip address passes is valid or not based
499
  on syntax (not ip range, class calculations or anything).
500

501
  """
502
  unit = "(0|[1-9]\d{0,2})"
503
  return re.match("^%s\.%s\.%s\.%s$" % (unit, unit, unit, unit), ip)
504

    
505

    
506
def IsValidShellParam(word):
507
  """Verifies is the given word is safe from the shell's p.o.v.
508

509
  This means that we can pass this to a command via the shell and be
510
  sure that it doesn't alter the command line and is passed as such to
511
  the actual command.
512

513
  Note that we are overly restrictive here, in order to be on the safe
514
  side.
515

516
  """
517
  return bool(re.match("^[-a-zA-Z0-9._+/:%@]+$", word))
518

    
519

    
520
def BuildShellCmd(template, *args):
521
  """Build a safe shell command line from the given arguments.
522

523
  This function will check all arguments in the args list so that they
524
  are valid shell parameters (i.e. they don't contain shell
525
  metacharaters). If everything is ok, it will return the result of
526
  template % args.
527

528
  """
529
  for word in args:
530
    if not IsValidShellParam(word):
531
      raise errors.ProgrammerError("Shell argument '%s' contains"
532
                                   " invalid characters" % word)
533
  return template % args
534

    
535

    
536
def FormatUnit(value):
537
  """Formats an incoming number of MiB with the appropriate unit.
538

539
  Value needs to be passed as a numeric type. Return value is always a string.
540

541
  """
542
  if value < 1024:
543
    return "%dM" % round(value, 0)
544

    
545
  elif value < (1024 * 1024):
546
    return "%0.1fG" % round(float(value) / 1024, 1)
547

    
548
  else:
549
    return "%0.1fT" % round(float(value) / 1024 / 1024, 1)
550

    
551

    
552
def ParseUnit(input_string):
553
  """Tries to extract number and scale from the given string.
554

555
  Input must be in the format NUMBER+ [DOT NUMBER+] SPACE* [UNIT]. If no unit
556
  is specified, it defaults to MiB. Return value is always an int in MiB.
557

558
  """
559
  m = re.match('^([.\d]+)\s*([a-zA-Z]+)?$', input_string)
560
  if not m:
561
    raise errors.UnitParseError("Invalid format")
562

    
563
  value = float(m.groups()[0])
564

    
565
  unit = m.groups()[1]
566
  if unit:
567
    lcunit = unit.lower()
568
  else:
569
    lcunit = 'm'
570

    
571
  if lcunit in ('m', 'mb', 'mib'):
572
    # Value already in MiB
573
    pass
574

    
575
  elif lcunit in ('g', 'gb', 'gib'):
576
    value *= 1024
577

    
578
  elif lcunit in ('t', 'tb', 'tib'):
579
    value *= 1024 * 1024
580

    
581
  else:
582
    raise errors.UnitParseError("Unknown unit: %s" % unit)
583

    
584
  # Make sure we round up
585
  if int(value) < value:
586
    value += 1
587

    
588
  # Round up to the next multiple of 4
589
  value = int(value)
590
  if value % 4:
591
    value += 4 - value % 4
592

    
593
  return value
594

    
595

    
596
def AddAuthorizedKey(file_name, key):
597
  """Adds an SSH public key to an authorized_keys file.
598

599
  Args:
600
    file_name: Path to authorized_keys file
601
    key: String containing key
602
  """
603
  key_fields = key.split()
604

    
605
  f = open(file_name, 'a+')
606
  try:
607
    nl = True
608
    for line in f:
609
      # Ignore whitespace changes
610
      if line.split() == key_fields:
611
        break
612
      nl = line.endswith('\n')
613
    else:
614
      if not nl:
615
        f.write("\n")
616
      f.write(key.rstrip('\r\n'))
617
      f.write("\n")
618
      f.flush()
619
  finally:
620
    f.close()
621

    
622

    
623
def RemoveAuthorizedKey(file_name, key):
624
  """Removes an SSH public key from an authorized_keys file.
625

626
  Args:
627
    file_name: Path to authorized_keys file
628
    key: String containing key
629
  """
630
  key_fields = key.split()
631

    
632
  fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
633
  try:
634
    out = os.fdopen(fd, 'w')
635
    try:
636
      f = open(file_name, 'r')
637
      try:
638
        for line in f:
639
          # Ignore whitespace changes while comparing lines
640
          if line.split() != key_fields:
641
            out.write(line)
642

    
643
        out.flush()
644
        os.rename(tmpname, file_name)
645
      finally:
646
        f.close()
647
    finally:
648
      out.close()
649
  except:
650
    RemoveFile(tmpname)
651
    raise
652

    
653

    
654
def SetEtcHostsEntry(file_name, ip, hostname, aliases):
655
  """Sets the name of an IP address and hostname in /etc/hosts.
656

657
  """
658
  # Ensure aliases are unique
659
  aliases = UniqueSequence([hostname] + aliases)[1:]
660

    
661
  fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
662
  try:
663
    out = os.fdopen(fd, 'w')
664
    try:
665
      f = open(file_name, 'r')
666
      try:
667
        written = False
668
        for line in f:
669
          fields = line.split()
670
          if fields and not fields[0].startswith('#') and ip == fields[0]:
671
            continue
672
          out.write(line)
673

    
674
        out.write("%s\t%s" % (ip, hostname))
675
        if aliases:
676
          out.write(" %s" % ' '.join(aliases))
677
        out.write('\n')
678

    
679
        out.flush()
680
        os.fsync(out)
681
        os.rename(tmpname, file_name)
682
      finally:
683
        f.close()
684
    finally:
685
      out.close()
686
  except:
687
    RemoveFile(tmpname)
688
    raise
689

    
690

    
691
def AddHostToEtcHosts(hostname):
692
  """Wrapper around SetEtcHostsEntry.
693

694
  """
695
  hi = HostInfo(name=hostname)
696
  SetEtcHostsEntry(constants.ETC_HOSTS, hi.ip, hi.name, [hi.ShortName()])
697

    
698

    
699
def RemoveEtcHostsEntry(file_name, hostname):
700
  """Removes a hostname from /etc/hosts.
701

702
  IP addresses without names are removed from the file.
703
  """
704
  fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
705
  try:
706
    out = os.fdopen(fd, 'w')
707
    try:
708
      f = open(file_name, 'r')
709
      try:
710
        for line in f:
711
          fields = line.split()
712
          if len(fields) > 1 and not fields[0].startswith('#'):
713
            names = fields[1:]
714
            if hostname in names:
715
              while hostname in names:
716
                names.remove(hostname)
717
              if names:
718
                out.write("%s %s\n" % (fields[0], ' '.join(names)))
719
              continue
720

    
721
          out.write(line)
722

    
723
        out.flush()
724
        os.fsync(out)
725
        os.rename(tmpname, file_name)
726
      finally:
727
        f.close()
728
    finally:
729
      out.close()
730
  except:
731
    RemoveFile(tmpname)
732
    raise
733

    
734

    
735
def RemoveHostFromEtcHosts(hostname):
736
  """Wrapper around RemoveEtcHostsEntry.
737

738
  """
739
  hi = HostInfo(name=hostname)
740
  RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.name)
741
  RemoveEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName())
742

    
743

    
744
def CreateBackup(file_name):
745
  """Creates a backup of a file.
746

747
  Returns: the path to the newly created backup file.
748

749
  """
750
  if not os.path.isfile(file_name):
751
    raise errors.ProgrammerError("Can't make a backup of a non-file '%s'" %
752
                                file_name)
753

    
754
  prefix = '%s.backup-%d.' % (os.path.basename(file_name), int(time.time()))
755
  dir_name = os.path.dirname(file_name)
756

    
757
  fsrc = open(file_name, 'rb')
758
  try:
759
    (fd, backup_name) = tempfile.mkstemp(prefix=prefix, dir=dir_name)
760
    fdst = os.fdopen(fd, 'wb')
761
    try:
762
      shutil.copyfileobj(fsrc, fdst)
763
    finally:
764
      fdst.close()
765
  finally:
766
    fsrc.close()
767

    
768
  return backup_name
769

    
770

    
771
def ShellQuote(value):
772
  """Quotes shell argument according to POSIX.
773

774
  """
775
  if _re_shell_unquoted.match(value):
776
    return value
777
  else:
778
    return "'%s'" % value.replace("'", "'\\''")
779

    
780

    
781
def ShellQuoteArgs(args):
782
  """Quotes all given shell arguments and concatenates using spaces.
783

784
  """
785
  return ' '.join([ShellQuote(i) for i in args])
786

    
787

    
788
def TcpPing(target, port, timeout=10, live_port_needed=False, source=None):
789
  """Simple ping implementation using TCP connect(2).
790

791
  Try to do a TCP connect(2) from an optional source IP to the
792
  specified target IP and the specified target port. If the optional
793
  parameter live_port_needed is set to true, requires the remote end
794
  to accept the connection. The timeout is specified in seconds and
795
  defaults to 10 seconds. If the source optional argument is not
796
  passed, the source address selection is left to the kernel,
797
  otherwise we try to connect using the passed address (failures to
798
  bind other than EADDRNOTAVAIL will be ignored).
799

800
  """
801
  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
802

    
803
  sucess = False
804

    
805
  if source is not None:
806
    try:
807
      sock.bind((source, 0))
808
    except socket.error, (errcode, errstring):
809
      if errcode == errno.EADDRNOTAVAIL:
810
        success = False
811

    
812
  sock.settimeout(timeout)
813

    
814
  try:
815
    sock.connect((target, port))
816
    sock.close()
817
    success = True
818
  except socket.timeout:
819
    success = False
820
  except socket.error, (errcode, errstring):
821
    success = (not live_port_needed) and (errcode == errno.ECONNREFUSED)
822

    
823
  return success
824

    
825

    
826
def OwnIpAddress(address):
827
  """Check if the current host has the the given IP address.
828

829
  Currently this is done by tcp-pinging the address from the loopback
830
  address.
831

832
  @type address: string
833
  @param address: the addres to check
834
  @rtype: bool
835

836
  """
837
  return TcpPing(address, constants.DEFAULT_NODED_PORT,
838
                 source=constants.LOCALHOST_IP_ADDRESS)
839

    
840

    
841
def ListVisibleFiles(path):
842
  """Returns a list of all visible files in a directory.
843

844
  """
845
  files = [i for i in os.listdir(path) if not i.startswith(".")]
846
  files.sort()
847
  return files
848

    
849

    
850
def GetHomeDir(user, default=None):
851
  """Try to get the homedir of the given user.
852

853
  The user can be passed either as a string (denoting the name) or as
854
  an integer (denoting the user id). If the user is not found, the
855
  'default' argument is returned, which defaults to None.
856

857
  """
858
  try:
859
    if isinstance(user, basestring):
860
      result = pwd.getpwnam(user)
861
    elif isinstance(user, (int, long)):
862
      result = pwd.getpwuid(user)
863
    else:
864
      raise errors.ProgrammerError("Invalid type passed to GetHomeDir (%s)" %
865
                                   type(user))
866
  except KeyError:
867
    return default
868
  return result.pw_dir
869

    
870

    
871
def NewUUID():
872
  """Returns a random UUID.
873

874
  """
875
  f = open("/proc/sys/kernel/random/uuid", "r")
876
  try:
877
    return f.read(128).rstrip("\n")
878
  finally:
879
    f.close()
880

    
881

    
882
def GenerateSecret():
883
  """Generates a random secret.
884

885
  This will generate a pseudo-random secret, and return its sha digest
886
  (so that it can be used where an ASCII string is needed).
887

888
  """
889
  return sha.new(os.urandom(64)).hexdigest()
890

    
891

    
892
def ReadFile(file_name, size=None):
893
  """Reads a file.
894

895
  @type size: None or int
896
  @param size: Read at most size bytes
897

898
  """
899
  f = open(file_name, "r")
900
  try:
901
    if size is None:
902
      return f.read()
903
    else:
904
      return f.read(size)
905
  finally:
906
    f.close()
907

    
908

    
909
def WriteFile(file_name, fn=None, data=None,
910
              mode=None, uid=-1, gid=-1,
911
              atime=None, mtime=None, close=True,
912
              dry_run=False, backup=False,
913
              prewrite=None, postwrite=None):
914
  """(Over)write a file atomically.
915

916
  The file_name and either fn (a function taking one argument, the
917
  file descriptor, and which should write the data to it) or data (the
918
  contents of the file) must be passed. The other arguments are
919
  optional and allow setting the file mode, owner and group, and the
920
  mtime/atime of the file.
921

922
  If the function doesn't raise an exception, it has succeeded and the
923
  target file has the new contents. If the file has raised an
924
  exception, an existing target file should be unmodified and the
925
  temporary file should be removed.
926

927
  Args:
928
    file_name: New filename
929
    fn: Content writing function, called with file descriptor as parameter
930
    data: Content as string
931
    mode: File mode
932
    uid: Owner
933
    gid: Group
934
    atime: Access time
935
    mtime: Modification time
936
    close: Whether to close file after writing it
937
    prewrite: Function object called before writing content
938
    postwrite: Function object called after writing content
939

940
  Returns:
941
    None if "close" parameter evaluates to True, otherwise file descriptor.
942

943
  """
944
  if not os.path.isabs(file_name):
945
    raise errors.ProgrammerError("Path passed to WriteFile is not"
946
                                 " absolute: '%s'" % file_name)
947

    
948
  if [fn, data].count(None) != 1:
949
    raise errors.ProgrammerError("fn or data required")
950

    
951
  if [atime, mtime].count(None) == 1:
952
    raise errors.ProgrammerError("Both atime and mtime must be either"
953
                                 " set or None")
954

    
955
  if backup and not dry_run and os.path.isfile(file_name):
956
    CreateBackup(file_name)
957

    
958
  dir_name, base_name = os.path.split(file_name)
959
  fd, new_name = tempfile.mkstemp('.new', base_name, dir_name)
960
  # here we need to make sure we remove the temp file, if any error
961
  # leaves it in place
962
  try:
963
    if uid != -1 or gid != -1:
964
      os.chown(new_name, uid, gid)
965
    if mode:
966
      os.chmod(new_name, mode)
967
    if callable(prewrite):
968
      prewrite(fd)
969
    if data is not None:
970
      os.write(fd, data)
971
    else:
972
      fn(fd)
973
    if callable(postwrite):
974
      postwrite(fd)
975
    os.fsync(fd)
976
    if atime is not None and mtime is not None:
977
      os.utime(new_name, (atime, mtime))
978
    if not dry_run:
979
      os.rename(new_name, file_name)
980
  finally:
981
    if close:
982
      os.close(fd)
983
      result = None
984
    else:
985
      result = fd
986
    RemoveFile(new_name)
987

    
988
  return result
989

    
990

    
991
def FirstFree(seq, base=0):
992
  """Returns the first non-existing integer from seq.
993

994
  The seq argument should be a sorted list of positive integers. The
995
  first time the index of an element is smaller than the element
996
  value, the index will be returned.
997

998
  The base argument is used to start at a different offset,
999
  i.e. [3, 4, 6] with offset=3 will return 5.
1000

1001
  Example: [0, 1, 3] will return 2.
1002

1003
  """
1004
  for idx, elem in enumerate(seq):
1005
    assert elem >= base, "Passed element is higher than base offset"
1006
    if elem > idx + base:
1007
      # idx is not used
1008
      return idx + base
1009
  return None
1010

    
1011

    
1012
def all(seq, pred=bool):
1013
  "Returns True if pred(x) is True for every element in the iterable"
1014
  for elem in itertools.ifilterfalse(pred, seq):
1015
    return False
1016
  return True
1017

    
1018

    
1019
def any(seq, pred=bool):
1020
  "Returns True if pred(x) is True for at least one element in the iterable"
1021
  for elem in itertools.ifilter(pred, seq):
1022
    return True
1023
  return False
1024

    
1025

    
1026
def UniqueSequence(seq):
1027
  """Returns a list with unique elements.
1028

1029
  Element order is preserved.
1030
  """
1031
  seen = set()
1032
  return [i for i in seq if i not in seen and not seen.add(i)]
1033

    
1034

    
1035
def IsValidMac(mac):
1036
  """Predicate to check if a MAC address is valid.
1037

1038
  Checks wether the supplied MAC address is formally correct, only
1039
  accepts colon separated format.
1040
  """
1041
  mac_check = re.compile("^([0-9a-f]{2}(:|$)){6}$")
1042
  return mac_check.match(mac) is not None
1043

    
1044

    
1045
def TestDelay(duration):
1046
  """Sleep for a fixed amount of time.
1047

1048
  """
1049
  if duration < 0:
1050
    return False
1051
  time.sleep(duration)
1052
  return True
1053

    
1054

    
1055
def Daemonize(logfile, noclose_fds=None):
1056
  """Daemonize the current process.
1057

1058
  This detaches the current process from the controlling terminal and
1059
  runs it in the background as a daemon.
1060

1061
  """
1062
  UMASK = 077
1063
  WORKDIR = "/"
1064
  # Default maximum for the number of available file descriptors.
1065
  if 'SC_OPEN_MAX' in os.sysconf_names:
1066
    try:
1067
      MAXFD = os.sysconf('SC_OPEN_MAX')
1068
      if MAXFD < 0:
1069
        MAXFD = 1024
1070
    except OSError:
1071
      MAXFD = 1024
1072
  else:
1073
    MAXFD = 1024
1074

    
1075
  # this might fail
1076
  pid = os.fork()
1077
  if (pid == 0):  # The first child.
1078
    os.setsid()
1079
    # this might fail
1080
    pid = os.fork() # Fork a second child.
1081
    if (pid == 0):  # The second child.
1082
      os.chdir(WORKDIR)
1083
      os.umask(UMASK)
1084
    else:
1085
      # exit() or _exit()?  See below.
1086
      os._exit(0) # Exit parent (the first child) of the second child.
1087
  else:
1088
    os._exit(0) # Exit parent of the first child.
1089
  maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
1090
  if (maxfd == resource.RLIM_INFINITY):
1091
    maxfd = MAXFD
1092

    
1093
  # Iterate through and close all file descriptors.
1094
  for fd in range(0, maxfd):
1095
    if noclose_fds and fd in noclose_fds:
1096
      continue
1097
    try:
1098
      os.close(fd)
1099
    except OSError: # ERROR, fd wasn't open to begin with (ignored)
1100
      pass
1101
  os.open(logfile, os.O_RDWR|os.O_CREAT|os.O_APPEND, 0600)
1102
  # Duplicate standard input to standard output and standard error.
1103
  os.dup2(0, 1)     # standard output (1)
1104
  os.dup2(0, 2)     # standard error (2)
1105
  return 0
1106

    
1107

    
1108
def DaemonPidFileName(name):
1109
  """Compute a ganeti pid file absolute path, given the daemon name.
1110

1111
  """
1112
  return os.path.join(constants.RUN_GANETI_DIR, "%s.pid" % name)
1113

    
1114

    
1115
def WritePidFile(name):
1116
  """Write the current process pidfile.
1117

1118
  The file will be written to constants.RUN_GANETI_DIR/name.pid
1119

1120
  """
1121
  pid = os.getpid()
1122
  pidfilename = DaemonPidFileName(name)
1123
  if IsProcessAlive(ReadPidFile(pidfilename)):
1124
    raise errors.GenericError("%s contains a live process" % pidfilename)
1125

    
1126
  WriteFile(pidfilename, data="%d\n" % pid)
1127

    
1128

    
1129
def RemovePidFile(name):
1130
  """Remove the current process pidfile.
1131

1132
  Any errors are ignored.
1133

1134
  """
1135
  pid = os.getpid()
1136
  pidfilename = DaemonPidFileName(name)
1137
  # TODO: we could check here that the file contains our pid
1138
  try:
1139
    RemoveFile(pidfilename)
1140
  except:
1141
    pass
1142

    
1143

    
1144
def KillProcess(pid, signal_=signal.SIGTERM, timeout=30):
1145
  """Kill a process given by its pid.
1146

1147
  @type pid: int
1148
  @param pid: The PID to terminate.
1149
  @type signal_: int
1150
  @param signal_: The signal to send, by default SIGTERM
1151
  @type timeout: int
1152
  @param timeout: The timeout after which, if the process is still alive,
1153
                  a SIGKILL will be sent. If not positive, no such checking
1154
                  will be done
1155

1156
  """
1157
  if pid <= 0:
1158
    # kill with pid=0 == suicide
1159
    raise errors.ProgrammerError("Invalid pid given '%s'" % pid)
1160

    
1161
  if not IsProcessAlive(pid):
1162
    return
1163
  os.kill(pid, signal_)
1164
  if timeout <= 0:
1165
    return
1166
  end = time.time() + timeout
1167
  while time.time() < end and IsProcessAlive(pid):
1168
    time.sleep(0.1)
1169
  if IsProcessAlive(pid):
1170
    os.kill(pid, signal.SIGKILL)
1171

    
1172

    
1173
def FindFile(name, search_path, test=os.path.exists):
1174
  """Look for a filesystem object in a given path.
1175

1176
  This is an abstract method to search for filesystem object (files,
1177
  dirs) under a given search path.
1178

1179
  Args:
1180
    - name: the name to look for
1181
    - search_path: list of directory names
1182
    - test: the test which the full path must satisfy
1183
      (defaults to os.path.exists)
1184

1185
  Returns:
1186
    - full path to the item if found
1187
    - None otherwise
1188

1189
  """
1190
  for dir_name in search_path:
1191
    item_name = os.path.sep.join([dir_name, name])
1192
    if test(item_name):
1193
      return item_name
1194
  return None
1195

    
1196

    
1197
def CheckVolumeGroupSize(vglist, vgname, minsize):
1198
  """Checks if the volume group list is valid.
1199

1200
  A non-None return value means there's an error, and the return value
1201
  is the error message.
1202

1203
  """
1204
  vgsize = vglist.get(vgname, None)
1205
  if vgsize is None:
1206
    return "volume group '%s' missing" % vgname
1207
  elif vgsize < minsize:
1208
    return ("volume group '%s' too small (%s MiB required, %d MiB found)" %
1209
            (vgname, minsize, vgsize))
1210
  return None
1211

    
1212

    
1213
def SplitTime(value):
1214
  """Splits time as floating point number into a tuple.
1215

1216
  @param value: Time in seconds
1217
  @type value: int or float
1218
  @return: Tuple containing (seconds, microseconds)
1219

1220
  """
1221
  (seconds, microseconds) = divmod(int(value * 1000000), 1000000)
1222

    
1223
  assert 0 <= seconds, \
1224
    "Seconds must be larger than or equal to 0, but are %s" % seconds
1225
  assert 0 <= microseconds <= 999999, \
1226
    "Microseconds must be 0-999999, but are %s" % microseconds
1227

    
1228
  return (int(seconds), int(microseconds))
1229

    
1230

    
1231
def MergeTime(timetuple):
1232
  """Merges a tuple into time as a floating point number.
1233

1234
  @param timetuple: Time as tuple, (seconds, microseconds)
1235
  @type timetuple: tuple
1236
  @return: Time as a floating point number expressed in seconds
1237

1238
  """
1239
  (seconds, microseconds) = timetuple
1240

    
1241
  assert 0 <= seconds, \
1242
    "Seconds must be larger than or equal to 0, but are %s" % seconds
1243
  assert 0 <= microseconds <= 999999, \
1244
    "Microseconds must be 0-999999, but are %s" % microseconds
1245

    
1246
  return float(seconds) + (float(microseconds) * 0.000001)
1247

    
1248

    
1249
def GetNodeDaemonPort():
1250
  """Get the node daemon port for this cluster.
1251

1252
  Note that this routine does not read a ganeti-specific file, but
1253
  instead uses socket.getservbyname to allow pre-customization of
1254
  this parameter outside of Ganeti.
1255

1256
  """
1257
  try:
1258
    port = socket.getservbyname("ganeti-noded", "tcp")
1259
  except socket.error:
1260
    port = constants.DEFAULT_NODED_PORT
1261

    
1262
  return port
1263

    
1264

    
1265
def GetNodeDaemonPassword():
1266
  """Get the node password for the cluster.
1267

1268
  """
1269
  return ReadFile(constants.CLUSTER_PASSWORD_FILE)
1270

    
1271

    
1272
def LockedMethod(fn):
1273
  """Synchronized object access decorator.
1274

1275
  This decorator is intended to protect access to an object using the
1276
  object's own lock which is hardcoded to '_lock'.
1277

1278
  """
1279
  def _LockDebug(*args, **kwargs):
1280
    if debug_locks:
1281
      logging.debug(*args, **kwargs)
1282

    
1283
  def wrapper(self, *args, **kwargs):
1284
    assert hasattr(self, '_lock')
1285
    lock = self._lock
1286
    _LockDebug("Waiting for %s", lock)
1287
    lock.acquire()
1288
    try:
1289
      _LockDebug("Acquired %s", lock)
1290
      result = fn(self, *args, **kwargs)
1291
    finally:
1292
      _LockDebug("Releasing %s", lock)
1293
      lock.release()
1294
      _LockDebug("Released %s", lock)
1295
    return result
1296
  return wrapper
1297

    
1298

    
1299
def LockFile(fd):
1300
  """Locks a file using POSIX locks.
1301

1302
  """
1303
  try:
1304
    fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
1305
  except IOError, err:
1306
    if err.errno == errno.EAGAIN:
1307
      raise errors.LockError("File already locked")
1308
    raise
1309

    
1310

    
1311
class FileLock(object):
1312
  """Utility class for file locks.
1313

1314
  """
1315
  def __init__(self, filename):
1316
    self.filename = filename
1317
    self.fd = open(self.filename, "w")
1318

    
1319
  def __del__(self):
1320
    self.Close()
1321

    
1322
  def Close(self):
1323
    if self.fd:
1324
      self.fd.close()
1325
      self.fd = None
1326

    
1327
  def _flock(self, flag, blocking, timeout, errmsg):
1328
    """Wrapper for fcntl.flock.
1329

1330
    @type flag: int
1331
    @param flag: Operation flag
1332
    @type blocking: bool
1333
    @param blocking: Whether the operation should be done in blocking mode.
1334
    @type timeout: None or float
1335
    @param timeout: For how long the operation should be retried (implies
1336
                    non-blocking mode).
1337
    @type errmsg: string
1338
    @param errmsg: Error message in case operation fails.
1339

1340
    """
1341
    assert self.fd, "Lock was closed"
1342
    assert timeout is None or timeout >= 0, \
1343
      "If specified, timeout must be positive"
1344

    
1345
    if timeout is not None:
1346
      flag |= fcntl.LOCK_NB
1347
      timeout_end = time.time() + timeout
1348

    
1349
    # Blocking doesn't have effect with timeout
1350
    elif not blocking:
1351
      flag |= fcntl.LOCK_NB
1352
      timeout_end = None
1353

    
1354
    retry = True
1355
    while retry:
1356
      try:
1357
        fcntl.flock(self.fd, flag)
1358
        retry = False
1359
      except IOError, err:
1360
        if err.errno in (errno.EAGAIN, ):
1361
          if timeout_end is not None and time.time() < timeout_end:
1362
            # Wait before trying again
1363
            time.sleep(max(0.1, min(1.0, timeout)))
1364
          else:
1365
            raise errors.LockError(errmsg)
1366
        else:
1367
          logging.exception("fcntl.flock failed")
1368
          raise
1369

    
1370
  def Exclusive(self, blocking=False, timeout=None):
1371
    """Locks the file in exclusive mode.
1372

1373
    """
1374
    self._flock(fcntl.LOCK_EX, blocking, timeout,
1375
                "Failed to lock %s in exclusive mode" % self.filename)
1376

    
1377
  def Shared(self, blocking=False, timeout=None):
1378
    """Locks the file in shared mode.
1379

1380
    """
1381
    self._flock(fcntl.LOCK_SH, blocking, timeout,
1382
                "Failed to lock %s in shared mode" % self.filename)
1383

    
1384
  def Unlock(self, blocking=True, timeout=None):
1385
    """Unlocks the file.
1386

1387
    According to "man flock", unlocking can also be a nonblocking operation:
1388
    "To make a non-blocking request, include LOCK_NB with any of the above
1389
    operations"
1390

1391
    """
1392
    self._flock(fcntl.LOCK_UN, blocking, timeout,
1393
                "Failed to unlock %s" % self.filename)
1394

    
1395

    
1396
class SignalHandler(object):
1397
  """Generic signal handler class.
1398

1399
  It automatically restores the original handler when deconstructed or when
1400
  Reset() is called. You can either pass your own handler function in or query
1401
  the "called" attribute to detect whether the signal was sent.
1402

1403
  """
1404
  def __init__(self, signum):
1405
    """Constructs a new SignalHandler instance.
1406

1407
    @param signum: Single signal number or set of signal numbers
1408

1409
    """
1410
    if isinstance(signum, (int, long)):
1411
      self.signum = set([signum])
1412
    else:
1413
      self.signum = set(signum)
1414

    
1415
    self.called = False
1416

    
1417
    self._previous = {}
1418
    try:
1419
      for signum in self.signum:
1420
        # Setup handler
1421
        prev_handler = signal.signal(signum, self._HandleSignal)
1422
        try:
1423
          self._previous[signum] = prev_handler
1424
        except:
1425
          # Restore previous handler
1426
          signal.signal(signum, prev_handler)
1427
          raise
1428
    except:
1429
      # Reset all handlers
1430
      self.Reset()
1431
      # Here we have a race condition: a handler may have already been called,
1432
      # but there's not much we can do about it at this point.
1433
      raise
1434

    
1435
  def __del__(self):
1436
    self.Reset()
1437

    
1438
  def Reset(self):
1439
    """Restore previous handler.
1440

1441
    """
1442
    for signum, prev_handler in self._previous.items():
1443
      signal.signal(signum, prev_handler)
1444
      # If successful, remove from dict
1445
      del self._previous[signum]
1446

    
1447
  def Clear(self):
1448
    """Unsets "called" flag.
1449

1450
    This function can be used in case a signal may arrive several times.
1451

1452
    """
1453
    self.called = False
1454

    
1455
  def _HandleSignal(self, signum, frame):
1456
    """Actual signal handling function.
1457

1458
    """
1459
    # This is not nice and not absolutely atomic, but it appears to be the only
1460
    # solution in Python -- there are no atomic types.
1461
    self.called = True