code.grnet.gr Git - ganeti-local/blob - lib/utils/text.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21 """Utility functions for manipulating or working with text.
  22
  23 """
  24
  25
  26 import re
  27 import os
  28 import time
  29 import collections
  30
  31 from ganeti import errors
  32
  33
  34 #: Unit checker regexp
  35 _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")
  36
  37 #: Characters which don't need to be quoted for shell commands
  38 _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$")
  39
  40 #: MAC checker regexp
  41 _MAC_CHECK_RE = re.compile("^([0-9a-f]{2}:){5}[0-9a-f]{2}$", re.I)
  42
  43 #: Shell param checker regexp
  44 _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$")
  45
  46
  47 def MatchNameComponent(key, name_list, case_sensitive=True):
  48   """Try to match a name against a list.
  49
  50   This function will try to match a name like test1 against a list
  51   like C{['test1.example.com', 'test2.example.com', ...]}. Against
  52   this list, I{'test1'} as well as I{'test1.example'} will match, but
  53   not I{'test1.ex'}. A multiple match will be considered as no match
  54   at all (e.g. I{'test1'} against C{['test1.example.com',
  55   'test1.example.org']}), except when the key fully matches an entry
  56   (e.g. I{'test1'} against C{['test1', 'test1.example.com']}).
  57
  58   @type key: str
  59   @param key: the name to be searched
  60   @type name_list: list
  61   @param name_list: the list of strings against which to search the key
  62   @type case_sensitive: boolean
  63   @param case_sensitive: whether to provide a case-sensitive match
  64
  65   @rtype: None or str
  66   @return: None if there is no match I{or} if there are multiple matches,
  67       otherwise the element from the list which matches
  68
  69   """
  70   if key in name_list:
  71     return key
  72
  73   re_flags = 0
  74   if not case_sensitive:
  75     re_flags |= re.IGNORECASE
  76     key = key.upper()
  77
  78   name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags)
  79
  80   names_filtered = []
  81   string_matches = []
  82   for name in name_list:
  83     if name_re.match(name) is not None:
  84       names_filtered.append(name)
  85       if not case_sensitive and key == name.upper():
  86         string_matches.append(name)
  87
  88   if len(string_matches) == 1:
  89     return string_matches[0]
  90   if len(names_filtered) == 1:
  91     return names_filtered[0]
  92
  93   return None
  94
  95
  96 def _DnsNameGlobHelper(match):
  97   """Helper function for L{DnsNameGlobPattern}.
  98
  99   Returns regular expression pattern for parts of the pattern.
 100
 101   """
 102   text = match.group(0)
 103
 104   if text == "*":
 105     return "[^.]*"
 106   elif text == "?":
 107     return "[^.]"
 108   else:
 109     return re.escape(text)
 110
 111
 112 def DnsNameGlobPattern(pattern):
 113   """Generates regular expression from DNS name globbing pattern.
 114
 115   A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular
 116   expression. Escape sequences or ranges (e.g. [a-z]) are not supported.
 117
 118   Matching always starts at the leftmost part. An asterisk (*) matches all
 119   characters except the dot (.) separating DNS name parts. A question mark (?)
 120   matches a single character except the dot (.).
 121
 122   @type pattern: string
 123   @param pattern: DNS name globbing pattern
 124   @rtype: string
 125   @return: Regular expression
 126
 127   """
 128   return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
 129
 130
 131 def FormatUnit(value, units):
 132   """Formats an incoming number of MiB with the appropriate unit.
 133
 134   @type value: int
 135   @param value: integer representing the value in MiB (1048576)
 136   @type units: char
 137   @param units: the type of formatting we should do:
 138       - 'h' for automatic scaling
 139       - 'm' for MiBs
 140       - 'g' for GiBs
 141       - 't' for TiBs
 142   @rtype: str
 143   @return: the formatted value (with suffix)
 144
 145   """
 146   if units not in ("m", "g", "t", "h"):
 147     raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units))
 148
 149   suffix = ""
 150
 151   if units == "m" or (units == "h" and value < 1024):
 152     if units == "h":
 153       suffix = "M"
 154     return "%d%s" % (round(value, 0), suffix)
 155
 156   elif units == "g" or (units == "h" and value < (1024 * 1024)):
 157     if units == "h":
 158       suffix = "G"
 159     return "%0.1f%s" % (round(float(value) / 1024, 1), suffix)
 160
 161   else:
 162     if units == "h":
 163       suffix = "T"
 164     return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
 165
 166
 167 def ParseUnit(input_string):
 168   """Tries to extract number and scale from the given string.
 169
 170   Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE*
 171   [UNIT]}. If no unit is specified, it defaults to MiB. Return value
 172   is always an int in MiB.
 173
 174   """
 175   m = _PARSEUNIT_REGEX.match(str(input_string))
 176   if not m:
 177     raise errors.UnitParseError("Invalid format")
 178
 179   value = float(m.groups()[0])
 180
 181   unit = m.groups()[1]
 182   if unit:
 183     lcunit = unit.lower()
 184   else:
 185     lcunit = "m"
 186
 187   if lcunit in ("m", "mb", "mib"):
 188     # Value already in MiB
 189     pass
 190
 191   elif lcunit in ("g", "gb", "gib"):
 192     value *= 1024
 193
 194   elif lcunit in ("t", "tb", "tib"):
 195     value *= 1024 * 1024
 196
 197   else:
 198     raise errors.UnitParseError("Unknown unit: %s" % unit)
 199
 200   # Make sure we round up
 201   if int(value) < value:
 202     value += 1
 203
 204   # Round up to the next multiple of 4
 205   value = int(value)
 206   if value % 4:
 207     value += 4 - value % 4
 208
 209   return value
 210
 211
 212 def ShellQuote(value):
 213   """Quotes shell argument according to POSIX.
 214
 215   @type value: str
 216   @param value: the argument to be quoted
 217   @rtype: str
 218   @return: the quoted value
 219
 220   """
 221   if _SHELL_UNQUOTED_RE.match(value):
 222     return value
 223   else:
 224     return "'%s'" % value.replace("'", "'\\''")
 225
 226
 227 def ShellQuoteArgs(args):
 228   """Quotes a list of shell arguments.
 229
 230   @type args: list
 231   @param args: list of arguments to be quoted
 232   @rtype: str
 233   @return: the quoted arguments concatenated with spaces
 234
 235   """
 236   return " ".join([ShellQuote(i) for i in args])
 237
 238
 239 class ShellWriter:
 240   """Helper class to write scripts with indentation.
 241
 242   """
 243   INDENT_STR = "  "
 244
 245   def __init__(self, fh):
 246     """Initializes this class.
 247
 248     """
 249     self._fh = fh
 250     self._indent = 0
 251
 252   def IncIndent(self):
 253     """Increase indentation level by 1.
 254
 255     """
 256     self._indent += 1
 257
 258   def DecIndent(self):
 259     """Decrease indentation level by 1.
 260
 261     """
 262     assert self._indent > 0
 263     self._indent -= 1
 264
 265   def Write(self, txt, *args):
 266     """Write line to output file.
 267
 268     """
 269     assert self._indent >= 0
 270
 271     self._fh.write(self._indent * self.INDENT_STR)
 272
 273     if args:
 274       self._fh.write(txt % args)
 275     else:
 276       self._fh.write(txt)
 277
 278     self._fh.write("\n")
 279
 280
 281 def GenerateSecret(numbytes=20):
 282   """Generates a random secret.
 283
 284   This will generate a pseudo-random secret returning an hex string
 285   (so that it can be used where an ASCII string is needed).
 286
 287   @param numbytes: the number of bytes which will be represented by the returned
 288       string (defaulting to 20, the length of a SHA1 hash)
 289   @rtype: str
 290   @return: an hex representation of the pseudo-random sequence
 291
 292   """
 293   return os.urandom(numbytes).encode("hex")
 294
 295
 296 def NormalizeAndValidateMac(mac):
 297   """Normalizes and check if a MAC address is valid.
 298
 299   Checks whether the supplied MAC address is formally correct, only
 300   accepts colon separated format. Normalize it to all lower.
 301
 302   @type mac: str
 303   @param mac: the MAC to be validated
 304   @rtype: str
 305   @return: returns the normalized and validated MAC.
 306
 307   @raise errors.OpPrereqError: If the MAC isn't valid
 308
 309   """
 310   if not _MAC_CHECK_RE.match(mac):
 311     raise errors.OpPrereqError("Invalid MAC address '%s'" % mac,
 312                                errors.ECODE_INVAL)
 313
 314   return mac.lower()
 315
 316
 317 def SafeEncode(text):
 318   """Return a 'safe' version of a source string.
 319
 320   This function mangles the input string and returns a version that
 321   should be safe to display/encode as ASCII. To this end, we first
 322   convert it to ASCII using the 'backslashreplace' encoding which
 323   should get rid of any non-ASCII chars, and then we process it
 324   through a loop copied from the string repr sources in the python; we
 325   don't use string_escape anymore since that escape single quotes and
 326   backslashes too, and that is too much; and that escaping is not
 327   stable, i.e. string_escape(string_escape(x)) != string_escape(x).
 328
 329   @type text: str or unicode
 330   @param text: input data
 331   @rtype: str
 332   @return: a safe version of text
 333
 334   """
 335   if isinstance(text, unicode):
 336     # only if unicode; if str already, we handle it below
 337     text = text.encode("ascii", "backslashreplace")
 338   resu = ""
 339   for char in text:
 340     c = ord(char)
 341     if char == "\t":
 342       resu += r"\t"
 343     elif char == "\n":
 344       resu += r"\n"
 345     elif char == "\r":
 346       resu += r'\'r'
 347     elif c < 32 or c >= 127: # non-printable
 348       resu += "\\x%02x" % (c & 0xff)
 349     else:
 350       resu += char
 351   return resu
 352
 353
 354 def UnescapeAndSplit(text, sep=","):
 355   """Split and unescape a string based on a given separator.
 356
 357   This function splits a string based on a separator where the
 358   separator itself can be escape in order to be an element of the
 359   elements. The escaping rules are (assuming coma being the
 360   separator):
 361     - a plain , separates the elements
 362     - a sequence \\\\, (double backslash plus comma) is handled as a
 363       backslash plus a separator comma
 364     - a sequence \, (backslash plus comma) is handled as a
 365       non-separator comma
 366
 367   @type text: string
 368   @param text: the string to split
 369   @type sep: string
 370   @param text: the separator
 371   @rtype: string
 372   @return: a list of strings
 373
 374   """
 375   # we split the list by sep (with no escaping at this stage)
 376   slist = text.split(sep)
 377   # next, we revisit the elements and if any of them ended with an odd
 378   # number of backslashes, then we join it with the next
 379   rlist = []
 380   while slist:
 381     e1 = slist.pop(0)
 382     if e1.endswith("\\"):
 383       num_b = len(e1) - len(e1.rstrip("\\"))
 384       if num_b % 2 == 1 and slist:
 385         e2 = slist.pop(0)
 386         # Merge the two elements and push the result back to the source list for
 387         # revisiting. If e2 ended with backslashes, further merging may need to
 388         # be done.
 389         slist.insert(0, e1 + sep + e2)
 390         continue
 391     # here the backslashes remain (all), and will be reduced in the next step
 392     rlist.append(e1)
 393   # finally, replace backslash-something with something
 394   rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist]
 395   return rlist
 396
 397
 398 def CommaJoin(names):
 399   """Nicely join a set of identifiers.
 400
 401   @param names: set, list or tuple
 402   @return: a string with the formatted results
 403
 404   """
 405   return ", ".join([str(val) for val in names])
 406
 407
 408 def FormatTime(val):
 409   """Formats a time value.
 410
 411   @type val: float or None
 412   @param val: Timestamp as returned by time.time() (seconds since Epoch,
 413     1970-01-01 00:00:00 UTC)
 414   @return: a string value or N/A if we don't have a valid timestamp
 415
 416   """
 417   if val is None or not isinstance(val, (int, float)):
 418     return "N/A"
 419   # these two codes works on Linux, but they are not guaranteed on all
 420   # platforms
 421   return time.strftime("%F %T", time.localtime(val))
 422
 423
 424 def FormatSeconds(secs):
 425   """Formats seconds for easier reading.
 426
 427   @type secs: number
 428   @param secs: Number of seconds
 429   @rtype: string
 430   @return: Formatted seconds (e.g. "2d 9h 19m 49s")
 431
 432   """
 433   parts = []
 434
 435   secs = round(secs, 0)
 436
 437   if secs > 0:
 438     # Negative values would be a bit tricky
 439     for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]:
 440       (complete, secs) = divmod(secs, one)
 441       if complete or parts:
 442         parts.append("%d%s" % (complete, unit))
 443
 444   parts.append("%ds" % secs)
 445
 446   return " ".join(parts)
 447
 448
 449 class LineSplitter:
 450   """Splits data chunks into lines separated by newline.
 451
 452   Instances provide a file-like interface.
 453
 454   """
 455   def __init__(self, line_fn, *args):
 456     """Initializes this class.
 457
 458     @type line_fn: callable
 459     @param line_fn: Function called for each line, first parameter is line
 460     @param args: Extra arguments for L{line_fn}
 461
 462     """
 463     assert callable(line_fn)
 464
 465     if args:
 466       # Python 2.4 doesn't have functools.partial yet
 467       self._line_fn = \
 468         lambda line: line_fn(line, *args) # pylint: disable=W0142
 469     else:
 470       self._line_fn = line_fn
 471
 472     self._lines = collections.deque()
 473     self._buffer = ""
 474
 475   def write(self, data):
 476     parts = (self._buffer + data).split("\n")
 477     self._buffer = parts.pop()
 478     self._lines.extend(parts)
 479
 480   def flush(self):
 481     while self._lines:
 482       self._line_fn(self._lines.popleft().rstrip("\r\n"))
 483
 484   def close(self):
 485     self.flush()
 486     if self._buffer:
 487       self._line_fn(self._buffer)
 488
 489
 490 def IsValidShellParam(word):
 491   """Verifies is the given word is safe from the shell's p.o.v.
 492
 493   This means that we can pass this to a command via the shell and be
 494   sure that it doesn't alter the command line and is passed as such to
 495   the actual command.
 496
 497   Note that we are overly restrictive here, in order to be on the safe
 498   side.
 499
 500   @type word: str
 501   @param word: the word to check
 502   @rtype: boolean
 503   @return: True if the word is 'safe'
 504
 505   """
 506   return bool(_SHELLPARAM_REGEX.match(word))
 507
 508
 509 def BuildShellCmd(template, *args):
 510   """Build a safe shell command line from the given arguments.
 511
 512   This function will check all arguments in the args list so that they
 513   are valid shell parameters (i.e. they don't contain shell
 514   metacharacters). If everything is ok, it will return the result of
 515   template % args.
 516
 517   @type template: str
 518   @param template: the string holding the template for the
 519       string formatting
 520   @rtype: str
 521   @return: the expanded command line
 522
 523   """
 524   for word in args:
 525     if not IsValidShellParam(word):
 526       raise errors.ProgrammerError("Shell argument '%s' contains"
 527                                    " invalid characters" % word)
 528   return template % args
 529
 530
 531 def FormatOrdinal(value):
 532   """Formats a number as an ordinal in the English language.
 533
 534   E.g. the number 1 becomes "1st", 22 becomes "22nd".
 535
 536   @type value: integer
 537   @param value: Number
 538   @rtype: string
 539
 540   """
 541   tens = value % 10
 542
 543   if value > 10 and value < 20:
 544     suffix = "th"
 545   elif tens == 1:
 546     suffix = "st"
 547   elif tens == 2:
 548     suffix = "nd"
 549   elif tens == 3:
 550     suffix = "rd"
 551   else:
 552     suffix = "th"
 553
 554   return "%s%s" % (value, suffix)