code.grnet.gr Git - ganeti-local/blob - lib/utils.py

   1 #!/usr/bin/python
   2 #
   3
   4 # Copyright (C) 2006, 2007 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Ganeti small utilities
  23 """
  24
  25
  26 import sys
  27 import os
  28 import sha
  29 import time
  30 import subprocess
  31 import re
  32 import socket
  33 import tempfile
  34 import shutil
  35 from errno import ENOENT, ENOTDIR, EISDIR, EEXIST
  36
  37 from ganeti import logger
  38 from ganeti import errors
  39
  40 _locksheld = []
  41 _re_shell_unquoted = re.compile('^[-.,=:/_+@A-Za-z0-9]+$')
  42
  43 class RunResult(object):
  44   """Simple class for holding the result of running external programs.
  45
  46   Instance variables:
  47     exit_code: the exit code of the program, or None (if the program
  48                didn't exit())
  49     signal: numeric signal that caused the program to finish, or None
  50             (if the program wasn't terminated by a signal)
  51     stdout: the standard output of the program
  52     stderr: the standard error of the program
  53     failed: a Boolean value which is True in case the program was
  54             terminated by a signal or exited with a non-zero exit code
  55     fail_reason: a string detailing the termination reason
  56
  57   """
  58   __slots__ = ["exit_code", "signal", "stdout", "stderr",
  59                "failed", "fail_reason", "cmd"]
  60
  61
  62   def __init__(self, exit_code, signal, stdout, stderr, cmd):
  63     self.cmd = cmd
  64     self.exit_code = exit_code
  65     self.signal = signal
  66     self.stdout = stdout
  67     self.stderr = stderr
  68     self.failed = (signal is not None or exit_code != 0)
  69
  70     if self.signal is not None:
  71       self.fail_reason = "terminated by signal %s" % self.signal
  72     elif self.exit_code is not None:
  73       self.fail_reason = "exited with exit code %s" % self.exit_code
  74     else:
  75       self.fail_reason = "unable to determine termination reason"
  76
  77   def _GetOutput(self):
  78     """Returns the combined stdout and stderr for easier usage.
  79
  80     """
  81     return self.stdout + self.stderr
  82
  83   output = property(_GetOutput, None, None, "Return full output")
  84
  85
  86 def _GetLockFile(subsystem):
  87   """Compute the file name for a given lock name."""
  88   return "/var/lock/ganeti_lock_%s" % subsystem
  89
  90
  91 def Lock(name, max_retries=None, debug=False):
  92   """Lock a given subsystem.
  93
  94   In case the lock is already held by an alive process, the function
  95   will sleep indefintely and poll with a one second interval.
  96
  97   When the optional integer argument 'max_retries' is passed with a
  98   non-zero value, the function will sleep only for this number of
  99   times, and then it will will raise a LockError if the lock can't be
 100   acquired. Passing in a negative number will cause only one try to
 101   get the lock. Passing a positive number will make the function retry
 102   for approximately that number of seconds.
 103
 104   """
 105   lockfile = _GetLockFile(name)
 106
 107   if name in _locksheld:
 108     raise errors.LockError('Lock "%s" already held!' % (name,))
 109
 110   errcount = 0
 111
 112   retries = 0
 113   while True:
 114     try:
 115       fd = os.open(lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR | os.O_SYNC)
 116       break
 117     except OSError, creat_err:
 118       if creat_err.errno != EEXIST:
 119         raise errors.LockError("Can't create the lock file. Error '%s'." %
 120                                str(creat_err))
 121
 122       try:
 123         pf = open(lockfile, 'r')
 124       except IOError, open_err:
 125         errcount += 1
 126         if errcount >= 5:
 127           raise errors.LockError("Lock file exists but cannot be opened."
 128                                  " Error: '%s'." % str(open_err))
 129         time.sleep(1)
 130         continue
 131
 132       try:
 133         pid = int(pf.read())
 134       except ValueError:
 135         raise errors.LockError("Invalid pid string in %s" %
 136                                (lockfile,))
 137
 138       if not IsProcessAlive(pid):
 139         raise errors.LockError("Stale lockfile %s for pid %d?" %
 140                                (lockfile, pid))
 141
 142       if max_retries and max_retries <= retries:
 143         raise errors.LockError("Can't acquire lock during the specified"
 144                                " time, aborting.")
 145       if retries == 5 and (debug or sys.stdin.isatty()):
 146         logger.ToStderr("Waiting for '%s' lock from pid %d..." % (name, pid))
 147
 148       time.sleep(1)
 149       retries += 1
 150       continue
 151
 152   os.write(fd, '%d\n' % (os.getpid(),))
 153   os.close(fd)
 154
 155   _locksheld.append(name)
 156
 157
 158 def Unlock(name):
 159   """Unlock a given subsystem.
 160
 161   """
 162   lockfile = _GetLockFile(name)
 163
 164   try:
 165     fd = os.open(lockfile, os.O_RDONLY)
 166   except OSError:
 167     raise errors.LockError('Lock "%s" not held.' % (name,))
 168
 169   f = os.fdopen(fd, 'r')
 170   pid_str = f.read()
 171
 172   try:
 173     pid = int(pid_str)
 174   except ValueError:
 175     raise errors.LockError('Unable to determine PID of locking process.')
 176
 177   if pid != os.getpid():
 178     raise errors.LockError('Lock not held by me (%d != %d)' %
 179                            (os.getpid(), pid,))
 180
 181   os.unlink(lockfile)
 182   _locksheld.remove(name)
 183
 184
 185 def LockCleanup():
 186   """Remove all locks.
 187
 188   """
 189   for lock in _locksheld:
 190     Unlock(lock)
 191
 192
 193 def RunCmd(cmd):
 194   """Execute a (shell) command.
 195
 196   The command should not read from its standard input, as it will be
 197   closed.
 198
 199   Args:
 200     cmd: command to run. (str)
 201
 202   Returns: `RunResult` instance
 203
 204   """
 205   if isinstance(cmd, list):
 206     cmd = [str(val) for val in cmd]
 207     strcmd = " ".join(cmd)
 208     shell = False
 209   else:
 210     strcmd = cmd
 211     shell = True
 212   child = subprocess.Popen(cmd, shell=shell,
 213                            stderr=subprocess.PIPE,
 214                            stdout=subprocess.PIPE,
 215                            stdin=subprocess.PIPE,
 216                            close_fds=True)
 217
 218   child.stdin.close()
 219   out = child.stdout.read()
 220   err = child.stderr.read()
 221
 222   status = child.wait()
 223   if status >= 0:
 224     exitcode = status
 225     signal = None
 226   else:
 227     exitcode = None
 228     signal = -status
 229
 230   return RunResult(exitcode, signal, out, err, strcmd)
 231
 232
 233 def RunCmdUnlocked(cmd):
 234   """Execute a shell command without the 'cmd' lock.
 235
 236   This variant of `RunCmd()` drops the 'cmd' lock before running the
 237   command and re-aquires it afterwards, thus it can be used to call
 238   other ganeti commands.
 239
 240   The argument and return values are the same as for the `RunCmd()`
 241   function.
 242
 243   Args:
 244     cmd - command to run. (str)
 245
 246   Returns:
 247     `RunResult`
 248
 249   """
 250   Unlock('cmd')
 251   ret = RunCmd(cmd)
 252   Lock('cmd')
 253
 254   return ret
 255
 256
 257 def RemoveFile(filename):
 258   """Remove a file ignoring some errors.
 259
 260   Remove a file, ignoring non-existing ones or directories. Other
 261   errors are passed.
 262
 263   """
 264   try:
 265     os.unlink(filename)
 266   except OSError, err:
 267     if err.errno not in (ENOENT, EISDIR):
 268       raise
 269
 270
 271 def _FingerprintFile(filename):
 272   """Compute the fingerprint of a file.
 273
 274   If the file does not exist, a None will be returned
 275   instead.
 276
 277   Args:
 278     filename - Filename (str)
 279
 280   """
 281   if not (os.path.exists(filename) and os.path.isfile(filename)):
 282     return None
 283
 284   f = open(filename)
 285
 286   fp = sha.sha()
 287   while True:
 288     data = f.read(4096)
 289     if not data:
 290       break
 291
 292     fp.update(data)
 293
 294   return fp.hexdigest()
 295
 296
 297 def FingerprintFiles(files):
 298   """Compute fingerprints for a list of files.
 299
 300   Args:
 301     files - array of filenames.  ( [str, ...] )
 302
 303   Return value:
 304     dictionary of filename: fingerprint for the files that exist
 305
 306   """
 307   ret = {}
 308
 309   for filename in files:
 310     cksum = _FingerprintFile(filename)
 311     if cksum:
 312       ret[filename] = cksum
 313
 314   return ret
 315
 316
 317 def CheckDict(target, template, logname=None):
 318   """Ensure a dictionary has a required set of keys.
 319
 320   For the given dictionaries `target` and `template`, ensure target
 321   has all the keys from template. Missing keys are added with values
 322   from template.
 323
 324   Args:
 325     target   - the dictionary to check
 326     template - template dictionary
 327     logname  - a caller-chosen string to identify the debug log
 328                entry; if None, no logging will be done
 329
 330   Returns value:
 331     None
 332
 333   """
 334   missing = []
 335   for k in template:
 336     if k not in target:
 337       missing.append(k)
 338       target[k] = template[k]
 339
 340   if missing and logname:
 341     logger.Debug('%s missing keys %s' %
 342                  (logname, ', '.join(missing)))
 343
 344
 345 def IsProcessAlive(pid):
 346   """Check if a given pid exists on the system.
 347
 348   Returns: true or false, depending on if the pid exists or not
 349
 350   Remarks: zombie processes treated as not alive
 351
 352   """
 353   try:
 354     f = open("/proc/%d/status" % pid)
 355   except IOError, err:
 356     if err.errno in (ENOENT, ENOTDIR):
 357       return False
 358
 359   alive = True
 360   try:
 361     data = f.readlines()
 362     if len(data) > 1:
 363       state = data[1].split()
 364       if len(state) > 1 and state[1] == "Z":
 365         alive = False
 366   finally:
 367     f.close()
 368
 369   return alive
 370
 371
 372 def MatchNameComponent(key, name_list):
 373   """Try to match a name against a list.
 374
 375   This function will try to match a name like test1 against a list
 376   like ['test1.example.com', 'test2.example.com', ...]. Against this
 377   list, 'test1' as well as 'test1.example' will match, but not
 378   'test1.ex'. A multiple match will be considered as no match at all
 379   (e.g. 'test1' against ['test1.example.com', 'test1.example.org']).
 380
 381   Args:
 382     key: the name to be searched
 383     name_list: the list of strings against which to search the key
 384
 385   Returns:
 386     None if there is no match *or* if there are multiple matches
 387     otherwise the element from the list which matches
 388
 389   """
 390   mo = re.compile("^%s(\..*)?$" % re.escape(key))
 391   names_filtered = [name for name in name_list if mo.match(name) is not None]
 392   if len(names_filtered) != 1:
 393     return None
 394   return names_filtered[0]
 395
 396
 397 def LookupHostname(hostname):
 398   """Look up hostname
 399
 400   Args:
 401     hostname: hostname to look up, can be also be a non FQDN
 402
 403   Returns:
 404     Dictionary with keys:
 405     - ip: IP addr
 406     - hostname_full: hostname fully qualified
 407     - hostname: hostname fully qualified (historic artifact)
 408
 409   """
 410   try:
 411     (fqdn, dummy, ipaddrs) = socket.gethostbyname_ex(hostname)
 412     ipaddr = ipaddrs[0]
 413   except socket.gaierror:
 414     # hostname not found in DNS
 415     return None
 416
 417   returnhostname = {
 418     "ip": ipaddr,
 419     "hostname_full": fqdn,
 420     "hostname": fqdn,
 421     }
 422
 423   return returnhostname
 424
 425
 426 def ListVolumeGroups():
 427   """List volume groups and their size
 428
 429   Returns:
 430      Dictionary with keys volume name and values the size of the volume
 431
 432   """
 433   command = "vgs --noheadings --units m --nosuffix -o name,size"
 434   result = RunCmd(command)
 435   retval = {}
 436   if result.failed:
 437     return retval
 438
 439   for line in result.stdout.splitlines():
 440     try:
 441       name, size = line.split()
 442       size = int(float(size))
 443     except (IndexError, ValueError), err:
 444       logger.Error("Invalid output from vgs (%s): %s" % (err, line))
 445       continue
 446
 447     retval[name] = size
 448
 449   return retval
 450
 451
 452 def BridgeExists(bridge):
 453   """Check whether the given bridge exists in the system
 454
 455   Returns:
 456      True if it does, false otherwise.
 457
 458   """
 459   return os.path.isdir("/sys/class/net/%s/bridge" % bridge)
 460
 461
 462 def NiceSort(name_list):
 463   """Sort a list of strings based on digit and non-digit groupings.
 464
 465   Given a list of names ['a1', 'a10', 'a11', 'a2'] this function will
 466   sort the list in the logical order ['a1', 'a2', 'a10', 'a11'].
 467
 468   The sort algorithm breaks each name in groups of either only-digits
 469   or no-digits. Only the first eight such groups are considered, and
 470   after that we just use what's left of the string.
 471
 472   Return value
 473     - a copy of the list sorted according to our algorithm
 474
 475   """
 476   _SORTER_BASE = "(\D+|\d+)"
 477   _SORTER_FULL = "^%s%s?%s?%s?%s?%s?%s?%s?.*$" % (_SORTER_BASE, _SORTER_BASE,
 478                                                   _SORTER_BASE, _SORTER_BASE,
 479                                                   _SORTER_BASE, _SORTER_BASE,
 480                                                   _SORTER_BASE, _SORTER_BASE)
 481   _SORTER_RE = re.compile(_SORTER_FULL)
 482   _SORTER_NODIGIT = re.compile("^\D*$")
 483   def _TryInt(val):
 484     """Attempts to convert a variable to integer."""
 485     if val is None or _SORTER_NODIGIT.match(val):
 486       return val
 487     rval = int(val)
 488     return rval
 489
 490   to_sort = [([_TryInt(grp) for grp in _SORTER_RE.match(name).groups()], name)
 491              for name in name_list]
 492   to_sort.sort()
 493   return [tup[1] for tup in to_sort]
 494
 495
 496 def CheckDaemonAlive(pid_file, process_string):
 497   """Check wether the specified daemon is alive.
 498
 499   Args:
 500    - pid_file: file to read the daemon pid from, the file is
 501                expected to contain only a single line containing
 502                only the PID
 503    - process_string: a substring that we expect to find in
 504                      the command line of the daemon process
 505
 506   Returns:
 507    - True if the daemon is judged to be alive (that is:
 508       - the PID file exists, is readable and contains a number
 509       - a process of the specified PID is running
 510       - that process contains the specified string in its
 511         command line
 512       - the process is not in state Z (zombie))
 513    - False otherwise
 514
 515   """
 516   try:
 517     pid_file = file(pid_file, 'r')
 518     try:
 519       pid = int(pid_file.readline())
 520     finally:
 521       pid_file.close()
 522
 523     cmdline_file_path = "/proc/%s/cmdline" % (pid)
 524     cmdline_file = open(cmdline_file_path, 'r')
 525     try:
 526       cmdline = cmdline_file.readline()
 527     finally:
 528       cmdline_file.close()
 529
 530     if not process_string in cmdline:
 531       return False
 532
 533     stat_file_path =  "/proc/%s/stat" % (pid)
 534     stat_file = open(stat_file_path, 'r')
 535     try:
 536       process_state = stat_file.readline().split()[2]
 537     finally:
 538       stat_file.close()
 539
 540     if process_state == 'Z':
 541       return False
 542
 543   except (IndexError, IOError, ValueError):
 544     return False
 545
 546   return True
 547
 548
 549 def TryConvert(fn, val):
 550   """Try to convert a value ignoring errors.
 551
 552   This function tries to apply function `fn` to `val`. If no
 553   ValueError or TypeError exceptions are raised, it will return the
 554   result, else it will return the original value. Any other exceptions
 555   are propagated to the caller.
 556
 557   """
 558   try:
 559     nv = fn(val)
 560   except (ValueError, TypeError), err:
 561     nv = val
 562   return nv
 563
 564
 565 def IsValidIP(ip):
 566   """Verifies the syntax of an IP address.
 567
 568   This function checks if the ip address passes is valid or not based
 569   on syntax (not ip range, class calculations or anything).
 570
 571   """
 572   unit = "(0|[1-9]\d{0,2})"
 573   return re.match("^%s\.%s\.%s\.%s$" % (unit, unit, unit, unit), ip)
 574
 575
 576 def IsValidShellParam(word):
 577   """Verifies is the given word is safe from the shell's p.o.v.
 578
 579   This means that we can pass this to a command via the shell and be
 580   sure that it doesn't alter the command line and is passed as such to
 581   the actual command.
 582
 583   Note that we are overly restrictive here, in order to be on the safe
 584   side.
 585
 586   """
 587   return bool(re.match("^[-a-zA-Z0-9._+/:%@]+$", word))
 588
 589
 590 def BuildShellCmd(template, *args):
 591   """Build a safe shell command line from the given arguments.
 592
 593   This function will check all arguments in the args list so that they
 594   are valid shell parameters (i.e. they don't contain shell
 595   metacharaters). If everything is ok, it will return the result of
 596   template % args.
 597
 598   """
 599   for word in args:
 600     if not IsValidShellParam(word):
 601       raise errors.ProgrammerError("Shell argument '%s' contains"
 602                                    " invalid characters" % word)
 603   return template % args
 604
 605
 606 def FormatUnit(value):
 607   """Formats an incoming number of MiB with the appropriate unit.
 608
 609   Value needs to be passed as a numeric type. Return value is always a string.
 610
 611   """
 612   if value < 1024:
 613     return "%dM" % round(value, 0)
 614
 615   elif value < (1024 * 1024):
 616     return "%0.1fG" % round(float(value) / 1024, 1)
 617
 618   else:
 619     return "%0.1fT" % round(float(value) / 1024 / 1024, 1)
 620
 621
 622 def ParseUnit(input_string):
 623   """Tries to extract number and scale from the given string.
 624
 625   Input must be in the format NUMBER+ [DOT NUMBER+] SPACE* [UNIT]. If no unit
 626   is specified, it defaults to MiB. Return value is always an int in MiB.
 627
 628   """
 629   m = re.match('^([.\d]+)\s*([a-zA-Z]+)?$', input_string)
 630   if not m:
 631     raise errors.UnitParseError("Invalid format")
 632
 633   value = float(m.groups()[0])
 634
 635   unit = m.groups()[1]
 636   if unit:
 637     lcunit = unit.lower()
 638   else:
 639     lcunit = 'm'
 640
 641   if lcunit in ('m', 'mb', 'mib'):
 642     # Value already in MiB
 643     pass
 644
 645   elif lcunit in ('g', 'gb', 'gib'):
 646     value *= 1024
 647
 648   elif lcunit in ('t', 'tb', 'tib'):
 649     value *= 1024 * 1024
 650
 651   else:
 652     raise errors.UnitParseError("Unknown unit: %s" % unit)
 653
 654   # Make sure we round up
 655   if int(value) < value:
 656     value += 1
 657
 658   # Round up to the next multiple of 4
 659   value = int(value)
 660   if value % 4:
 661     value += 4 - value % 4
 662
 663   return value
 664
 665
 666 def AddAuthorizedKey(file_name, key):
 667   """Adds an SSH public key to an authorized_keys file.
 668
 669   Args:
 670     file_name: Path to authorized_keys file
 671     key: String containing key
 672   """
 673   key_fields = key.split()
 674
 675   f = open(file_name, 'a+')
 676   try:
 677     nl = True
 678     for line in f:
 679       # Ignore whitespace changes
 680       if line.split() == key_fields:
 681         break
 682       nl = line.endswith('\n')
 683     else:
 684       if not nl:
 685         f.write("\n")
 686       f.write(key.rstrip('\r\n'))
 687       f.write("\n")
 688       f.flush()
 689   finally:
 690     f.close()
 691
 692
 693 def RemoveAuthorizedKey(file_name, key):
 694   """Removes an SSH public key from an authorized_keys file.
 695
 696   Args:
 697     file_name: Path to authorized_keys file
 698     key: String containing key
 699   """
 700   key_fields = key.split()
 701
 702   fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
 703   out = os.fdopen(fd, 'w')
 704   try:
 705     f = open(file_name, 'r')
 706     try:
 707       for line in f:
 708         # Ignore whitespace changes while comparing lines
 709         if line.split() != key_fields:
 710           out.write(line)
 711
 712       out.flush()
 713       os.rename(tmpname, file_name)
 714     finally:
 715       f.close()
 716   finally:
 717     out.close()
 718
 719
 720 def CreateBackup(file_name):
 721   """Creates a backup of a file.
 722
 723   Returns: the path to the newly created backup file.
 724
 725   """
 726   if not os.path.isfile(file_name):
 727     raise errors.ProgrammerError("Can't make a backup of a non-file '%s'" %
 728                                 file_name)
 729
 730   # Warning: the following code contains a race condition when we create more
 731   # than one backup of the same file in a second.
 732   backup_name = file_name + '.backup-%d' % int(time.time())
 733   shutil.copyfile(file_name, backup_name)
 734   return backup_name
 735
 736
 737 def ShellQuote(value):
 738   """Quotes shell argument according to POSIX.
 739
 740   """
 741   if _re_shell_unquoted.match(value):
 742     return value
 743   else:
 744     return "'%s'" % value.replace("'", "'\\''")
 745
 746
 747 def ShellQuoteArgs(args):
 748   """Quotes all given shell arguments and concatenates using spaces.
 749
 750   """
 751   return ' '.join([ShellQuote(i) for i in args])
 752
 753
 754 def _ParseIpOutput(output):
 755   """Parsing code for GetLocalIPAddresses().
 756
 757   This function is split out, so we can unit test it.
 758
 759   """
 760   re_ip = re.compile('^(\d+\.\d+\.\d+\.\d+)(?:/\d+)$')
 761
 762   ips = []
 763   for line in output.splitlines(False):
 764     fields = line.split()
 765     if len(line) < 4:
 766       continue
 767     m = re_ip.match(fields[3])
 768     if m:
 769       ips.append(m.group(1))
 770
 771   return ips
 772
 773
 774 def GetLocalIPAddresses():
 775   """Gets a list of all local IP addresses.
 776
 777   Should this break one day, a small Python module written in C could
 778   use the API call getifaddrs().
 779
 780   """
 781   result = RunCmd(["ip", "-family", "inet", "-oneline", "addr", "show"])
 782   if result.failed:
 783     raise errors.OpExecError("Command '%s' failed, error: %s,"
 784       " output: %s" % (result.cmd, result.fail_reason, result.output))
 785
 786   return _ParseIpOutput(result.output)