Statistics
| Branch: | Tag: | Revision:

root / lib / utils.py @ 4ca1b175

History | View | Annotate | Download (20.4 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Ganeti small utilities
23
"""
24

    
25

    
26
import sys
27
import os
28
import sha
29
import time
30
import subprocess
31
import re
32
import socket
33
import tempfile
34
import shutil
35
import errno
36

    
37
from ganeti import logger
38
from ganeti import errors
39

    
40
_locksheld = []
41
_re_shell_unquoted = re.compile('^[-.,=:/_+@A-Za-z0-9]+$')
42

    
43
class RunResult(object):
44
  """Simple class for holding the result of running external programs.
45

46
  Instance variables:
47
    exit_code: the exit code of the program, or None (if the program
48
               didn't exit())
49
    signal: numeric signal that caused the program to finish, or None
50
            (if the program wasn't terminated by a signal)
51
    stdout: the standard output of the program
52
    stderr: the standard error of the program
53
    failed: a Boolean value which is True in case the program was
54
            terminated by a signal or exited with a non-zero exit code
55
    fail_reason: a string detailing the termination reason
56

57
  """
58
  __slots__ = ["exit_code", "signal", "stdout", "stderr",
59
               "failed", "fail_reason", "cmd"]
60

    
61

    
62
  def __init__(self, exit_code, signal, stdout, stderr, cmd):
63
    self.cmd = cmd
64
    self.exit_code = exit_code
65
    self.signal = signal
66
    self.stdout = stdout
67
    self.stderr = stderr
68
    self.failed = (signal is not None or exit_code != 0)
69

    
70
    if self.signal is not None:
71
      self.fail_reason = "terminated by signal %s" % self.signal
72
    elif self.exit_code is not None:
73
      self.fail_reason = "exited with exit code %s" % self.exit_code
74
    else:
75
      self.fail_reason = "unable to determine termination reason"
76

    
77
  def _GetOutput(self):
78
    """Returns the combined stdout and stderr for easier usage.
79

80
    """
81
    return self.stdout + self.stderr
82

    
83
  output = property(_GetOutput, None, None, "Return full output")
84

    
85

    
86
def _GetLockFile(subsystem):
87
  """Compute the file name for a given lock name."""
88
  return "/var/lock/ganeti_lock_%s" % subsystem
89

    
90

    
91
def Lock(name, max_retries=None, debug=False):
92
  """Lock a given subsystem.
93

94
  In case the lock is already held by an alive process, the function
95
  will sleep indefintely and poll with a one second interval.
96

97
  When the optional integer argument 'max_retries' is passed with a
98
  non-zero value, the function will sleep only for this number of
99
  times, and then it will will raise a LockError if the lock can't be
100
  acquired. Passing in a negative number will cause only one try to
101
  get the lock. Passing a positive number will make the function retry
102
  for approximately that number of seconds.
103

104
  """
105
  lockfile = _GetLockFile(name)
106

    
107
  if name in _locksheld:
108
    raise errors.LockError('Lock "%s" already held!' % (name,))
109

    
110
  errcount = 0
111

    
112
  retries = 0
113
  while True:
114
    try:
115
      fd = os.open(lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR | os.O_SYNC)
116
      break
117
    except OSError, creat_err:
118
      if creat_err.errno != errno.EEXIST:
119
        raise errors.LockError("Can't create the lock file. Error '%s'." %
120
                               str(creat_err))
121

    
122
      try:
123
        pf = open(lockfile, 'r')
124
      except IOError, open_err:
125
        errcount += 1
126
        if errcount >= 5:
127
          raise errors.LockError("Lock file exists but cannot be opened."
128
                                 " Error: '%s'." % str(open_err))
129
        time.sleep(1)
130
        continue
131

    
132
      try:
133
        pid = int(pf.read())
134
      except ValueError:
135
        raise errors.LockError("Invalid pid string in %s" %
136
                               (lockfile,))
137

    
138
      if not IsProcessAlive(pid):
139
        raise errors.LockError("Stale lockfile %s for pid %d?" %
140
                               (lockfile, pid))
141

    
142
      if max_retries and max_retries <= retries:
143
        raise errors.LockError("Can't acquire lock during the specified"
144
                               " time, aborting.")
145
      if retries == 5 and (debug or sys.stdin.isatty()):
146
        logger.ToStderr("Waiting for '%s' lock from pid %d..." % (name, pid))
147

    
148
      time.sleep(1)
149
      retries += 1
150
      continue
151

    
152
  os.write(fd, '%d\n' % (os.getpid(),))
153
  os.close(fd)
154

    
155
  _locksheld.append(name)
156

    
157

    
158
def Unlock(name):
159
  """Unlock a given subsystem.
160

161
  """
162
  lockfile = _GetLockFile(name)
163

    
164
  try:
165
    fd = os.open(lockfile, os.O_RDONLY)
166
  except OSError:
167
    raise errors.LockError('Lock "%s" not held.' % (name,))
168

    
169
  f = os.fdopen(fd, 'r')
170
  pid_str = f.read()
171

    
172
  try:
173
    pid = int(pid_str)
174
  except ValueError:
175
    raise errors.LockError('Unable to determine PID of locking process.')
176

    
177
  if pid != os.getpid():
178
    raise errors.LockError('Lock not held by me (%d != %d)' %
179
                           (os.getpid(), pid,))
180

    
181
  os.unlink(lockfile)
182
  _locksheld.remove(name)
183

    
184

    
185
def LockCleanup():
186
  """Remove all locks.
187

188
  """
189
  for lock in _locksheld:
190
    Unlock(lock)
191

    
192

    
193
def RunCmd(cmd):
194
  """Execute a (shell) command.
195

196
  The command should not read from its standard input, as it will be
197
  closed.
198

199
  Args:
200
    cmd: command to run. (str)
201

202
  Returns: `RunResult` instance
203

204
  """
205
  if isinstance(cmd, list):
206
    cmd = [str(val) for val in cmd]
207
    strcmd = " ".join(cmd)
208
    shell = False
209
  else:
210
    strcmd = cmd
211
    shell = True
212
  env = os.environ.copy()
213
  env["LC_ALL"] = "C"
214
  child = subprocess.Popen(cmd, shell=shell,
215
                           stderr=subprocess.PIPE,
216
                           stdout=subprocess.PIPE,
217
                           stdin=subprocess.PIPE,
218
                           close_fds=True, env=env)
219

    
220
  child.stdin.close()
221
  out = child.stdout.read()
222
  err = child.stderr.read()
223

    
224
  status = child.wait()
225
  if status >= 0:
226
    exitcode = status
227
    signal = None
228
  else:
229
    exitcode = None
230
    signal = -status
231

    
232
  return RunResult(exitcode, signal, out, err, strcmd)
233

    
234

    
235
def RunCmdUnlocked(cmd):
236
  """Execute a shell command without the 'cmd' lock.
237

238
  This variant of `RunCmd()` drops the 'cmd' lock before running the
239
  command and re-aquires it afterwards, thus it can be used to call
240
  other ganeti commands.
241

242
  The argument and return values are the same as for the `RunCmd()`
243
  function.
244

245
  Args:
246
    cmd - command to run. (str)
247

248
  Returns:
249
    `RunResult`
250

251
  """
252
  Unlock('cmd')
253
  ret = RunCmd(cmd)
254
  Lock('cmd')
255

    
256
  return ret
257

    
258

    
259
def RemoveFile(filename):
260
  """Remove a file ignoring some errors.
261

262
  Remove a file, ignoring non-existing ones or directories. Other
263
  errors are passed.
264

265
  """
266
  try:
267
    os.unlink(filename)
268
  except OSError, err:
269
    if err.errno not in (errno.ENOENT, errno.EISDIR):
270
      raise
271

    
272

    
273
def _FingerprintFile(filename):
274
  """Compute the fingerprint of a file.
275

276
  If the file does not exist, a None will be returned
277
  instead.
278

279
  Args:
280
    filename - Filename (str)
281

282
  """
283
  if not (os.path.exists(filename) and os.path.isfile(filename)):
284
    return None
285

    
286
  f = open(filename)
287

    
288
  fp = sha.sha()
289
  while True:
290
    data = f.read(4096)
291
    if not data:
292
      break
293

    
294
    fp.update(data)
295

    
296
  return fp.hexdigest()
297

    
298

    
299
def FingerprintFiles(files):
300
  """Compute fingerprints for a list of files.
301

302
  Args:
303
    files - array of filenames.  ( [str, ...] )
304

305
  Return value:
306
    dictionary of filename: fingerprint for the files that exist
307

308
  """
309
  ret = {}
310

    
311
  for filename in files:
312
    cksum = _FingerprintFile(filename)
313
    if cksum:
314
      ret[filename] = cksum
315

    
316
  return ret
317

    
318

    
319
def CheckDict(target, template, logname=None):
320
  """Ensure a dictionary has a required set of keys.
321

322
  For the given dictionaries `target` and `template`, ensure target
323
  has all the keys from template. Missing keys are added with values
324
  from template.
325

326
  Args:
327
    target   - the dictionary to check
328
    template - template dictionary
329
    logname  - a caller-chosen string to identify the debug log
330
               entry; if None, no logging will be done
331

332
  Returns value:
333
    None
334

335
  """
336
  missing = []
337
  for k in template:
338
    if k not in target:
339
      missing.append(k)
340
      target[k] = template[k]
341

    
342
  if missing and logname:
343
    logger.Debug('%s missing keys %s' %
344
                 (logname, ', '.join(missing)))
345

    
346

    
347
def IsProcessAlive(pid):
348
  """Check if a given pid exists on the system.
349

350
  Returns: true or false, depending on if the pid exists or not
351

352
  Remarks: zombie processes treated as not alive
353

354
  """
355
  try:
356
    f = open("/proc/%d/status" % pid)
357
  except IOError, err:
358
    if err.errno in (errno.ENOENT, errno.ENOTDIR):
359
      return False
360

    
361
  alive = True
362
  try:
363
    data = f.readlines()
364
    if len(data) > 1:
365
      state = data[1].split()
366
      if len(state) > 1 and state[1] == "Z":
367
        alive = False
368
  finally:
369
    f.close()
370

    
371
  return alive
372

    
373

    
374
def MatchNameComponent(key, name_list):
375
  """Try to match a name against a list.
376

377
  This function will try to match a name like test1 against a list
378
  like ['test1.example.com', 'test2.example.com', ...]. Against this
379
  list, 'test1' as well as 'test1.example' will match, but not
380
  'test1.ex'. A multiple match will be considered as no match at all
381
  (e.g. 'test1' against ['test1.example.com', 'test1.example.org']).
382

383
  Args:
384
    key: the name to be searched
385
    name_list: the list of strings against which to search the key
386

387
  Returns:
388
    None if there is no match *or* if there are multiple matches
389
    otherwise the element from the list which matches
390

391
  """
392
  mo = re.compile("^%s(\..*)?$" % re.escape(key))
393
  names_filtered = [name for name in name_list if mo.match(name) is not None]
394
  if len(names_filtered) != 1:
395
    return None
396
  return names_filtered[0]
397

    
398

    
399
def LookupHostname(hostname):
400
  """Look up hostname
401

402
  Args:
403
    hostname: hostname to look up, can be also be a non FQDN
404

405
  Returns:
406
    Dictionary with keys:
407
    - ip: IP addr
408
    - hostname_full: hostname fully qualified
409
    - hostname: hostname fully qualified (historic artifact)
410

411
  """
412
  try:
413
    (fqdn, dummy, ipaddrs) = socket.gethostbyname_ex(hostname)
414
    ipaddr = ipaddrs[0]
415
  except socket.gaierror:
416
    # hostname not found in DNS
417
    return None
418

    
419
  returnhostname = {
420
    "ip": ipaddr,
421
    "hostname_full": fqdn,
422
    "hostname": fqdn,
423
    }
424

    
425
  return returnhostname
426

    
427

    
428
def ListVolumeGroups():
429
  """List volume groups and their size
430

431
  Returns:
432
     Dictionary with keys volume name and values the size of the volume
433

434
  """
435
  command = "vgs --noheadings --units m --nosuffix -o name,size"
436
  result = RunCmd(command)
437
  retval = {}
438
  if result.failed:
439
    return retval
440

    
441
  for line in result.stdout.splitlines():
442
    try:
443
      name, size = line.split()
444
      size = int(float(size))
445
    except (IndexError, ValueError), err:
446
      logger.Error("Invalid output from vgs (%s): %s" % (err, line))
447
      continue
448

    
449
    retval[name] = size
450

    
451
  return retval
452

    
453

    
454
def BridgeExists(bridge):
455
  """Check whether the given bridge exists in the system
456

457
  Returns:
458
     True if it does, false otherwise.
459

460
  """
461
  return os.path.isdir("/sys/class/net/%s/bridge" % bridge)
462

    
463

    
464
def NiceSort(name_list):
465
  """Sort a list of strings based on digit and non-digit groupings.
466

467
  Given a list of names ['a1', 'a10', 'a11', 'a2'] this function will
468
  sort the list in the logical order ['a1', 'a2', 'a10', 'a11'].
469

470
  The sort algorithm breaks each name in groups of either only-digits
471
  or no-digits. Only the first eight such groups are considered, and
472
  after that we just use what's left of the string.
473

474
  Return value
475
    - a copy of the list sorted according to our algorithm
476

477
  """
478
  _SORTER_BASE = "(\D+|\d+)"
479
  _SORTER_FULL = "^%s%s?%s?%s?%s?%s?%s?%s?.*$" % (_SORTER_BASE, _SORTER_BASE,
480
                                                  _SORTER_BASE, _SORTER_BASE,
481
                                                  _SORTER_BASE, _SORTER_BASE,
482
                                                  _SORTER_BASE, _SORTER_BASE)
483
  _SORTER_RE = re.compile(_SORTER_FULL)
484
  _SORTER_NODIGIT = re.compile("^\D*$")
485
  def _TryInt(val):
486
    """Attempts to convert a variable to integer."""
487
    if val is None or _SORTER_NODIGIT.match(val):
488
      return val
489
    rval = int(val)
490
    return rval
491

    
492
  to_sort = [([_TryInt(grp) for grp in _SORTER_RE.match(name).groups()], name)
493
             for name in name_list]
494
  to_sort.sort()
495
  return [tup[1] for tup in to_sort]
496

    
497

    
498
def CheckDaemonAlive(pid_file, process_string):
499
  """Check wether the specified daemon is alive.
500

501
  Args:
502
   - pid_file: file to read the daemon pid from, the file is
503
               expected to contain only a single line containing
504
               only the PID
505
   - process_string: a substring that we expect to find in
506
                     the command line of the daemon process
507

508
  Returns:
509
   - True if the daemon is judged to be alive (that is:
510
      - the PID file exists, is readable and contains a number
511
      - a process of the specified PID is running
512
      - that process contains the specified string in its
513
        command line
514
      - the process is not in state Z (zombie))
515
   - False otherwise
516

517
  """
518
  try:
519
    pid_file = file(pid_file, 'r')
520
    try:
521
      pid = int(pid_file.readline())
522
    finally:
523
      pid_file.close()
524

    
525
    cmdline_file_path = "/proc/%s/cmdline" % (pid)
526
    cmdline_file = open(cmdline_file_path, 'r')
527
    try:
528
      cmdline = cmdline_file.readline()
529
    finally:
530
      cmdline_file.close()
531

    
532
    if not process_string in cmdline:
533
      return False
534

    
535
    stat_file_path =  "/proc/%s/stat" % (pid)
536
    stat_file = open(stat_file_path, 'r')
537
    try:
538
      process_state = stat_file.readline().split()[2]
539
    finally:
540
      stat_file.close()
541

    
542
    if process_state == 'Z':
543
      return False
544

    
545
  except (IndexError, IOError, ValueError):
546
    return False
547

    
548
  return True
549

    
550

    
551
def TryConvert(fn, val):
552
  """Try to convert a value ignoring errors.
553

554
  This function tries to apply function `fn` to `val`. If no
555
  ValueError or TypeError exceptions are raised, it will return the
556
  result, else it will return the original value. Any other exceptions
557
  are propagated to the caller.
558

559
  """
560
  try:
561
    nv = fn(val)
562
  except (ValueError, TypeError), err:
563
    nv = val
564
  return nv
565

    
566

    
567
def IsValidIP(ip):
568
  """Verifies the syntax of an IP address.
569

570
  This function checks if the ip address passes is valid or not based
571
  on syntax (not ip range, class calculations or anything).
572

573
  """
574
  unit = "(0|[1-9]\d{0,2})"
575
  return re.match("^%s\.%s\.%s\.%s$" % (unit, unit, unit, unit), ip)
576

    
577

    
578
def IsValidShellParam(word):
579
  """Verifies is the given word is safe from the shell's p.o.v.
580

581
  This means that we can pass this to a command via the shell and be
582
  sure that it doesn't alter the command line and is passed as such to
583
  the actual command.
584

585
  Note that we are overly restrictive here, in order to be on the safe
586
  side.
587

588
  """
589
  return bool(re.match("^[-a-zA-Z0-9._+/:%@]+$", word))
590

    
591

    
592
def BuildShellCmd(template, *args):
593
  """Build a safe shell command line from the given arguments.
594

595
  This function will check all arguments in the args list so that they
596
  are valid shell parameters (i.e. they don't contain shell
597
  metacharaters). If everything is ok, it will return the result of
598
  template % args.
599

600
  """
601
  for word in args:
602
    if not IsValidShellParam(word):
603
      raise errors.ProgrammerError("Shell argument '%s' contains"
604
                                   " invalid characters" % word)
605
  return template % args
606

    
607

    
608
def FormatUnit(value):
609
  """Formats an incoming number of MiB with the appropriate unit.
610

611
  Value needs to be passed as a numeric type. Return value is always a string.
612

613
  """
614
  if value < 1024:
615
    return "%dM" % round(value, 0)
616

    
617
  elif value < (1024 * 1024):
618
    return "%0.1fG" % round(float(value) / 1024, 1)
619

    
620
  else:
621
    return "%0.1fT" % round(float(value) / 1024 / 1024, 1)
622

    
623

    
624
def ParseUnit(input_string):
625
  """Tries to extract number and scale from the given string.
626

627
  Input must be in the format NUMBER+ [DOT NUMBER+] SPACE* [UNIT]. If no unit
628
  is specified, it defaults to MiB. Return value is always an int in MiB.
629

630
  """
631
  m = re.match('^([.\d]+)\s*([a-zA-Z]+)?$', input_string)
632
  if not m:
633
    raise errors.UnitParseError("Invalid format")
634

    
635
  value = float(m.groups()[0])
636

    
637
  unit = m.groups()[1]
638
  if unit:
639
    lcunit = unit.lower()
640
  else:
641
    lcunit = 'm'
642

    
643
  if lcunit in ('m', 'mb', 'mib'):
644
    # Value already in MiB
645
    pass
646

    
647
  elif lcunit in ('g', 'gb', 'gib'):
648
    value *= 1024
649

    
650
  elif lcunit in ('t', 'tb', 'tib'):
651
    value *= 1024 * 1024
652

    
653
  else:
654
    raise errors.UnitParseError("Unknown unit: %s" % unit)
655

    
656
  # Make sure we round up
657
  if int(value) < value:
658
    value += 1
659

    
660
  # Round up to the next multiple of 4
661
  value = int(value)
662
  if value % 4:
663
    value += 4 - value % 4
664

    
665
  return value
666

    
667

    
668
def AddAuthorizedKey(file_name, key):
669
  """Adds an SSH public key to an authorized_keys file.
670

671
  Args:
672
    file_name: Path to authorized_keys file
673
    key: String containing key
674
  """
675
  key_fields = key.split()
676

    
677
  f = open(file_name, 'a+')
678
  try:
679
    nl = True
680
    for line in f:
681
      # Ignore whitespace changes
682
      if line.split() == key_fields:
683
        break
684
      nl = line.endswith('\n')
685
    else:
686
      if not nl:
687
        f.write("\n")
688
      f.write(key.rstrip('\r\n'))
689
      f.write("\n")
690
      f.flush()
691
  finally:
692
    f.close()
693

    
694

    
695
def RemoveAuthorizedKey(file_name, key):
696
  """Removes an SSH public key from an authorized_keys file.
697

698
  Args:
699
    file_name: Path to authorized_keys file
700
    key: String containing key
701
  """
702
  key_fields = key.split()
703

    
704
  fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name))
705
  out = os.fdopen(fd, 'w')
706
  try:
707
    f = open(file_name, 'r')
708
    try:
709
      for line in f:
710
        # Ignore whitespace changes while comparing lines
711
        if line.split() != key_fields:
712
          out.write(line)
713

    
714
      out.flush()
715
      os.rename(tmpname, file_name)
716
    finally:
717
      f.close()
718
  finally:
719
    out.close()
720

    
721

    
722
def CreateBackup(file_name):
723
  """Creates a backup of a file.
724

725
  Returns: the path to the newly created backup file.
726

727
  """
728
  if not os.path.isfile(file_name):
729
    raise errors.ProgrammerError("Can't make a backup of a non-file '%s'" %
730
                                file_name)
731

    
732
  # Warning: the following code contains a race condition when we create more
733
  # than one backup of the same file in a second.
734
  backup_name = file_name + '.backup-%d' % int(time.time())
735
  shutil.copyfile(file_name, backup_name)
736
  return backup_name
737

    
738

    
739
def ShellQuote(value):
740
  """Quotes shell argument according to POSIX.
741

742
  """
743
  if _re_shell_unquoted.match(value):
744
    return value
745
  else:
746
    return "'%s'" % value.replace("'", "'\\''")
747

    
748

    
749
def ShellQuoteArgs(args):
750
  """Quotes all given shell arguments and concatenates using spaces.
751

752
  """
753
  return ' '.join([ShellQuote(i) for i in args])
754

    
755

    
756
def _ParseIpOutput(output):
757
  """Parsing code for GetLocalIPAddresses().
758

759
  This function is split out, so we can unit test it.
760

761
  """
762
  re_ip = re.compile('^(\d+\.\d+\.\d+\.\d+)(?:/\d+)$')
763

    
764
  ips = []
765
  for line in output.splitlines(False):
766
    fields = line.split()
767
    if len(line) < 4:
768
      continue
769
    m = re_ip.match(fields[3])
770
    if m:
771
      ips.append(m.group(1))
772

    
773
  return ips
774

    
775

    
776
def GetLocalIPAddresses():
777
  """Gets a list of all local IP addresses.
778

779
  Should this break one day, a small Python module written in C could
780
  use the API call getifaddrs().
781

782
  """
783
  result = RunCmd(["ip", "-family", "inet", "-oneline", "addr", "show"])
784
  if result.failed:
785
    raise errors.OpExecError("Command '%s' failed, error: %s,"
786
      " output: %s" % (result.cmd, result.fail_reason, result.output))
787

    
788
  return _ParseIpOutput(result.output)
789

    
790

    
791
def TcpPing(source, target, port, timeout=10, live_port_needed=True):
792
  """Simple ping implementation using TCP connect(2).
793

794
  Try to do a TCP connect(2) from the specified source IP to the specified
795
  target IP and the specified target port. If live_port_needed is set to true,
796
  requires the remote end to accept the connection. The timeout is specified
797
  in seconds and defaults to 10 seconds
798

799
  """
800
  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
801

    
802
  sucess = False
803

    
804
  try:
805
    sock.bind((source, 0))
806
  except socket.error, (errcode, errstring):
807
    if errcode == errno.EADDRNOTAVAIL:
808
      success = False
809

    
810
  sock.settimeout(timeout)
811

    
812
  try:
813
    sock.connect((target, port))
814
    sock.close()
815
    success = True
816
  except socket.timeout:
817
    success = False
818
  except socket.error, (errcode, errstring):
819
    success = (not live_port_needed) and (errcode == errno.ECONNREFUSED)
820

    
821
  return success