code.grnet.gr Git - ganeti-local/blob - lib/utils/text.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21 """Utility functions for manipulating or working with text.
  22
  23 """
  24
  25
  26 import re
  27 import os
  28 import time
  29 import collections
  30
  31 from ganeti import errors
  32
  33
  34 #: Unit checker regexp
  35 _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")
  36
  37 #: Characters which don't need to be quoted for shell commands
  38 _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$")
  39
  40 #: Shell param checker regexp
  41 _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$")
  42
  43 #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026)
  44 _ASCII_ELLIPSIS = "..."
  45
  46 #: MAC address octet
  47 _MAC_ADDR_OCTET_RE = r"[0-9a-f]{2}"
  48
  49
  50 def MatchNameComponent(key, name_list, case_sensitive=True):
  51   """Try to match a name against a list.
  52
  53   This function will try to match a name like test1 against a list
  54   like C{['test1.example.com', 'test2.example.com', ...]}. Against
  55   this list, I{'test1'} as well as I{'test1.example'} will match, but
  56   not I{'test1.ex'}. A multiple match will be considered as no match
  57   at all (e.g. I{'test1'} against C{['test1.example.com',
  58   'test1.example.org']}), except when the key fully matches an entry
  59   (e.g. I{'test1'} against C{['test1', 'test1.example.com']}).
  60
  61   @type key: str
  62   @param key: the name to be searched
  63   @type name_list: list
  64   @param name_list: the list of strings against which to search the key
  65   @type case_sensitive: boolean
  66   @param case_sensitive: whether to provide a case-sensitive match
  67
  68   @rtype: None or str
  69   @return: None if there is no match I{or} if there are multiple matches,
  70       otherwise the element from the list which matches
  71
  72   """
  73   if key in name_list:
  74     return key
  75
  76   re_flags = 0
  77   if not case_sensitive:
  78     re_flags |= re.IGNORECASE
  79     key = key.upper()
  80
  81   name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags)
  82
  83   names_filtered = []
  84   string_matches = []
  85   for name in name_list:
  86     if name_re.match(name) is not None:
  87       names_filtered.append(name)
  88       if not case_sensitive and key == name.upper():
  89         string_matches.append(name)
  90
  91   if len(string_matches) == 1:
  92     return string_matches[0]
  93   if len(names_filtered) == 1:
  94     return names_filtered[0]
  95
  96   return None
  97
  98
  99 def _DnsNameGlobHelper(match):
 100   """Helper function for L{DnsNameGlobPattern}.
 101
 102   Returns regular expression pattern for parts of the pattern.
 103
 104   """
 105   text = match.group(0)
 106
 107   if text == "*":
 108     return "[^.]*"
 109   elif text == "?":
 110     return "[^.]"
 111   else:
 112     return re.escape(text)
 113
 114
 115 def DnsNameGlobPattern(pattern):
 116   """Generates regular expression from DNS name globbing pattern.
 117
 118   A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular
 119   expression. Escape sequences or ranges (e.g. [a-z]) are not supported.
 120
 121   Matching always starts at the leftmost part. An asterisk (*) matches all
 122   characters except the dot (.) separating DNS name parts. A question mark (?)
 123   matches a single character except the dot (.).
 124
 125   @type pattern: string
 126   @param pattern: DNS name globbing pattern
 127   @rtype: string
 128   @return: Regular expression
 129
 130   """
 131   return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
 132
 133
 134 def FormatUnit(value, units):
 135   """Formats an incoming number of MiB with the appropriate unit.
 136
 137   @type value: int
 138   @param value: integer representing the value in MiB (1048576)
 139   @type units: char
 140   @param units: the type of formatting we should do:
 141       - 'h' for automatic scaling
 142       - 'm' for MiBs
 143       - 'g' for GiBs
 144       - 't' for TiBs
 145   @rtype: str
 146   @return: the formatted value (with suffix)
 147
 148   """
 149   if units not in ("m", "g", "t", "h"):
 150     raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units))
 151
 152   suffix = ""
 153
 154   if units == "m" or (units == "h" and value < 1024):
 155     if units == "h":
 156       suffix = "M"
 157     return "%d%s" % (round(value, 0), suffix)
 158
 159   elif units == "g" or (units == "h" and value < (1024 * 1024)):
 160     if units == "h":
 161       suffix = "G"
 162     return "%0.1f%s" % (round(float(value) / 1024, 1), suffix)
 163
 164   else:
 165     if units == "h":
 166       suffix = "T"
 167     return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
 168
 169
 170 def ParseUnit(input_string):
 171   """Tries to extract number and scale from the given string.
 172
 173   Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE*
 174   [UNIT]}. If no unit is specified, it defaults to MiB. Return value
 175   is always an int in MiB.
 176
 177   """
 178   m = _PARSEUNIT_REGEX.match(str(input_string))
 179   if not m:
 180     raise errors.UnitParseError("Invalid format")
 181
 182   value = float(m.groups()[0])
 183
 184   unit = m.groups()[1]
 185   if unit:
 186     lcunit = unit.lower()
 187   else:
 188     lcunit = "m"
 189
 190   if lcunit in ("m", "mb", "mib"):
 191     # Value already in MiB
 192     pass
 193
 194   elif lcunit in ("g", "gb", "gib"):
 195     value *= 1024
 196
 197   elif lcunit in ("t", "tb", "tib"):
 198     value *= 1024 * 1024
 199
 200   else:
 201     raise errors.UnitParseError("Unknown unit: %s" % unit)
 202
 203   # Make sure we round up
 204   if int(value) < value:
 205     value += 1
 206
 207   # Round up to the next multiple of 4
 208   value = int(value)
 209   if value % 4:
 210     value += 4 - value % 4
 211
 212   return value
 213
 214
 215 def ShellQuote(value):
 216   """Quotes shell argument according to POSIX.
 217
 218   @type value: str
 219   @param value: the argument to be quoted
 220   @rtype: str
 221   @return: the quoted value
 222
 223   """
 224   if _SHELL_UNQUOTED_RE.match(value):
 225     return value
 226   else:
 227     return "'%s'" % value.replace("'", "'\\''")
 228
 229
 230 def ShellQuoteArgs(args):
 231   """Quotes a list of shell arguments.
 232
 233   @type args: list
 234   @param args: list of arguments to be quoted
 235   @rtype: str
 236   @return: the quoted arguments concatenated with spaces
 237
 238   """
 239   return " ".join([ShellQuote(i) for i in args])
 240
 241
 242 class ShellWriter:
 243   """Helper class to write scripts with indentation.
 244
 245   """
 246   INDENT_STR = "  "
 247
 248   def __init__(self, fh, indent=True):
 249     """Initializes this class.
 250
 251     """
 252     self._fh = fh
 253     self._indent_enabled = indent
 254     self._indent = 0
 255
 256   def IncIndent(self):
 257     """Increase indentation level by 1.
 258
 259     """
 260     self._indent += 1
 261
 262   def DecIndent(self):
 263     """Decrease indentation level by 1.
 264
 265     """
 266     assert self._indent > 0
 267     self._indent -= 1
 268
 269   def Write(self, txt, *args):
 270     """Write line to output file.
 271
 272     """
 273     assert self._indent >= 0
 274
 275     if args:
 276       line = txt % args
 277     else:
 278       line = txt
 279
 280     if line and self._indent_enabled:
 281       # Indent only if there's something on the line
 282       self._fh.write(self._indent * self.INDENT_STR)
 283
 284     self._fh.write(line)
 285
 286     self._fh.write("\n")
 287
 288
 289 def GenerateSecret(numbytes=20):
 290   """Generates a random secret.
 291
 292   This will generate a pseudo-random secret returning an hex string
 293   (so that it can be used where an ASCII string is needed).
 294
 295   @param numbytes: the number of bytes which will be represented by the returned
 296       string (defaulting to 20, the length of a SHA1 hash)
 297   @rtype: str
 298   @return: an hex representation of the pseudo-random sequence
 299
 300   """
 301   return os.urandom(numbytes).encode("hex")
 302
 303
 304 def _MakeMacAddrRegexp(octets):
 305   """Builds a regular expression for verifying MAC addresses.
 306
 307   @type octets: integer
 308   @param octets: How many octets to expect (1-6)
 309   @return: Compiled regular expression
 310
 311   """
 312   assert octets > 0
 313   assert octets <= 6
 314
 315   return re.compile("^%s$" % ":".join([_MAC_ADDR_OCTET_RE] * octets),
 316                     re.I)
 317
 318
 319 #: Regular expression for full MAC address
 320 _MAC_CHECK_RE = _MakeMacAddrRegexp(6)
 321
 322 #: Regular expression for half a MAC address
 323 _MAC_PREFIX_CHECK_RE = _MakeMacAddrRegexp(3)
 324
 325
 326 def _MacAddressCheck(check_re, mac, msg):
 327   """Checks a MAC address using a regular expression.
 328
 329   @param check_re: Compiled regular expression as returned by C{re.compile}
 330   @type mac: string
 331   @param mac: MAC address to be validated
 332   @type msg: string
 333   @param msg: Error message (%s will be replaced with MAC address)
 334
 335   """
 336   if check_re.match(mac):
 337     return mac.lower()
 338
 339   raise errors.OpPrereqError(msg % mac, errors.ECODE_INVAL)
 340
 341
 342 def NormalizeAndValidateMac(mac):
 343   """Normalizes and check if a MAC address is valid and contains six octets.
 344
 345   Checks whether the supplied MAC address is formally correct. Accepts
 346   colon-separated format only. Normalize it to all lower case.
 347
 348   @type mac: string
 349   @param mac: MAC address to be validated
 350   @rtype: string
 351   @return: Normalized and validated MAC address
 352   @raise errors.OpPrereqError: If the MAC address isn't valid
 353
 354   """
 355   return _MacAddressCheck(_MAC_CHECK_RE, mac, "Invalid MAC address '%s'")
 356
 357
 358 def NormalizeAndValidateThreeOctetMacPrefix(mac):
 359   """Normalizes a potential MAC address prefix (three octets).
 360
 361   Checks whether the supplied string is a valid MAC address prefix consisting
 362   of three colon-separated octets. The result is normalized to all lower case.
 363
 364   @type mac: string
 365   @param mac: Prefix to be validated
 366   @rtype: string
 367   @return: Normalized and validated prefix
 368   @raise errors.OpPrereqError: If the MAC address prefix isn't valid
 369
 370   """
 371   return _MacAddressCheck(_MAC_PREFIX_CHECK_RE, mac,
 372                           "Invalid MAC address prefix '%s'")
 373
 374
 375 def SafeEncode(text):
 376   """Return a 'safe' version of a source string.
 377
 378   This function mangles the input string and returns a version that
 379   should be safe to display/encode as ASCII. To this end, we first
 380   convert it to ASCII using the 'backslashreplace' encoding which
 381   should get rid of any non-ASCII chars, and then we process it
 382   through a loop copied from the string repr sources in the python; we
 383   don't use string_escape anymore since that escape single quotes and
 384   backslashes too, and that is too much; and that escaping is not
 385   stable, i.e. string_escape(string_escape(x)) != string_escape(x).
 386
 387   @type text: str or unicode
 388   @param text: input data
 389   @rtype: str
 390   @return: a safe version of text
 391
 392   """
 393   if isinstance(text, unicode):
 394     # only if unicode; if str already, we handle it below
 395     text = text.encode("ascii", "backslashreplace")
 396   resu = ""
 397   for char in text:
 398     c = ord(char)
 399     if char == "\t":
 400       resu += r"\t"
 401     elif char == "\n":
 402       resu += r"\n"
 403     elif char == "\r":
 404       resu += r'\'r'
 405     elif c < 32 or c >= 127: # non-printable
 406       resu += "\\x%02x" % (c & 0xff)
 407     else:
 408       resu += char
 409   return resu
 410
 411
 412 def UnescapeAndSplit(text, sep=","):
 413   """Split and unescape a string based on a given separator.
 414
 415   This function splits a string based on a separator where the
 416   separator itself can be escape in order to be an element of the
 417   elements. The escaping rules are (assuming coma being the
 418   separator):
 419     - a plain , separates the elements
 420     - a sequence \\\\, (double backslash plus comma) is handled as a
 421       backslash plus a separator comma
 422     - a sequence \, (backslash plus comma) is handled as a
 423       non-separator comma
 424
 425   @type text: string
 426   @param text: the string to split
 427   @type sep: string
 428   @param text: the separator
 429   @rtype: string
 430   @return: a list of strings
 431
 432   """
 433   # we split the list by sep (with no escaping at this stage)
 434   slist = text.split(sep)
 435   # next, we revisit the elements and if any of them ended with an odd
 436   # number of backslashes, then we join it with the next
 437   rlist = []
 438   while slist:
 439     e1 = slist.pop(0)
 440     if e1.endswith("\\"):
 441       num_b = len(e1) - len(e1.rstrip("\\"))
 442       if num_b % 2 == 1 and slist:
 443         e2 = slist.pop(0)
 444         # Merge the two elements and push the result back to the source list for
 445         # revisiting. If e2 ended with backslashes, further merging may need to
 446         # be done.
 447         slist.insert(0, e1 + sep + e2)
 448         continue
 449     # here the backslashes remain (all), and will be reduced in the next step
 450     rlist.append(e1)
 451   # finally, replace backslash-something with something
 452   rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist]
 453   return rlist
 454
 455
 456 def CommaJoin(names):
 457   """Nicely join a set of identifiers.
 458
 459   @param names: set, list or tuple
 460   @return: a string with the formatted results
 461
 462   """
 463   return ", ".join([str(val) for val in names])
 464
 465
 466 def FormatTime(val, usecs=None):
 467   """Formats a time value.
 468
 469   @type val: float or None
 470   @param val: Timestamp as returned by time.time() (seconds since Epoch,
 471     1970-01-01 00:00:00 UTC)
 472   @return: a string value or N/A if we don't have a valid timestamp
 473
 474   """
 475   if val is None or not isinstance(val, (int, float)):
 476     return "N/A"
 477
 478   # these two codes works on Linux, but they are not guaranteed on all
 479   # platforms
 480   result = time.strftime("%F %T", time.localtime(val))
 481
 482   if usecs is not None:
 483     result += ".%06d" % usecs
 484
 485   return result
 486
 487
 488 def FormatSeconds(secs):
 489   """Formats seconds for easier reading.
 490
 491   @type secs: number
 492   @param secs: Number of seconds
 493   @rtype: string
 494   @return: Formatted seconds (e.g. "2d 9h 19m 49s")
 495
 496   """
 497   parts = []
 498
 499   secs = round(secs, 0)
 500
 501   if secs > 0:
 502     # Negative values would be a bit tricky
 503     for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]:
 504       (complete, secs) = divmod(secs, one)
 505       if complete or parts:
 506         parts.append("%d%s" % (complete, unit))
 507
 508   parts.append("%ds" % secs)
 509
 510   return " ".join(parts)
 511
 512
 513 class LineSplitter:
 514   """Splits data chunks into lines separated by newline.
 515
 516   Instances provide a file-like interface.
 517
 518   """
 519   def __init__(self, line_fn, *args):
 520     """Initializes this class.
 521
 522     @type line_fn: callable
 523     @param line_fn: Function called for each line, first parameter is line
 524     @param args: Extra arguments for L{line_fn}
 525
 526     """
 527     assert callable(line_fn)
 528
 529     if args:
 530       # Python 2.4 doesn't have functools.partial yet
 531       self._line_fn = \
 532         lambda line: line_fn(line, *args) # pylint: disable=W0142
 533     else:
 534       self._line_fn = line_fn
 535
 536     self._lines = collections.deque()
 537     self._buffer = ""
 538
 539   def write(self, data):
 540     parts = (self._buffer + data).split("\n")
 541     self._buffer = parts.pop()
 542     self._lines.extend(parts)
 543
 544   def flush(self):
 545     while self._lines:
 546       self._line_fn(self._lines.popleft().rstrip("\r\n"))
 547
 548   def close(self):
 549     self.flush()
 550     if self._buffer:
 551       self._line_fn(self._buffer)
 552
 553
 554 def IsValidShellParam(word):
 555   """Verifies is the given word is safe from the shell's p.o.v.
 556
 557   This means that we can pass this to a command via the shell and be
 558   sure that it doesn't alter the command line and is passed as such to
 559   the actual command.
 560
 561   Note that we are overly restrictive here, in order to be on the safe
 562   side.
 563
 564   @type word: str
 565   @param word: the word to check
 566   @rtype: boolean
 567   @return: True if the word is 'safe'
 568
 569   """
 570   return bool(_SHELLPARAM_REGEX.match(word))
 571
 572
 573 def BuildShellCmd(template, *args):
 574   """Build a safe shell command line from the given arguments.
 575
 576   This function will check all arguments in the args list so that they
 577   are valid shell parameters (i.e. they don't contain shell
 578   metacharacters). If everything is ok, it will return the result of
 579   template % args.
 580
 581   @type template: str
 582   @param template: the string holding the template for the
 583       string formatting
 584   @rtype: str
 585   @return: the expanded command line
 586
 587   """
 588   for word in args:
 589     if not IsValidShellParam(word):
 590       raise errors.ProgrammerError("Shell argument '%s' contains"
 591                                    " invalid characters" % word)
 592   return template % args
 593
 594
 595 def FormatOrdinal(value):
 596   """Formats a number as an ordinal in the English language.
 597
 598   E.g. the number 1 becomes "1st", 22 becomes "22nd".
 599
 600   @type value: integer
 601   @param value: Number
 602   @rtype: string
 603
 604   """
 605   tens = value % 10
 606
 607   if value > 10 and value < 20:
 608     suffix = "th"
 609   elif tens == 1:
 610     suffix = "st"
 611   elif tens == 2:
 612     suffix = "nd"
 613   elif tens == 3:
 614     suffix = "rd"
 615   else:
 616     suffix = "th"
 617
 618   return "%s%s" % (value, suffix)
 619
 620
 621 def Truncate(text, length):
 622   """Truncate string and add ellipsis if needed.
 623
 624   @type text: string
 625   @param text: Text
 626   @type length: integer
 627   @param length: Desired length
 628   @rtype: string
 629   @return: Truncated text
 630
 631   """
 632   assert length > len(_ASCII_ELLIPSIS)
 633
 634   # Serialize if necessary
 635   if not isinstance(text, basestring):
 636     text = str(text)
 637
 638   if len(text) <= length:
 639     return text
 640   else:
 641     return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS
 642
 643
 644 def FilterEmptyLinesAndComments(text):
 645   """Filters empty lines and comments from a line-based string.
 646
 647   Whitespace is also removed from the beginning and end of all lines.
 648
 649   @type text: string
 650   @param text: Input string
 651   @rtype: list
 652
 653   """
 654   return [line for line in map(lambda s: s.strip(), text.splitlines())
 655           # Ignore empty lines and comments
 656           if line and not line.startswith("#")]
 657
 658
 659 def FormatKeyValue(data):
 660   """Formats a dictionary as "key=value" parameters.
 661
 662   The keys are sorted to have a stable order.
 663
 664   @type data: dict
 665   @rtype: list of string
 666
 667   """
 668   return ["%s=%s" % (key, value) for (key, value) in sorted(data.items())]