code.grnet.gr Git - ganeti-local/blob - lib/utils/text.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21 """Utility functions for manipulating or working with text.
  22
  23 """
  24
  25
  26 import re
  27 import os
  28 import time
  29 import collections
  30
  31 from ganeti import errors
  32
  33
  34 #: Unit checker regexp
  35 _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")
  36
  37 #: Characters which don't need to be quoted for shell commands
  38 _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$")
  39
  40 #: MAC checker regexp
  41 _MAC_CHECK_RE = re.compile("^([0-9a-f]{2}:){5}[0-9a-f]{2}$", re.I)
  42
  43 #: Shell param checker regexp
  44 _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$")
  45
  46 #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026)
  47 _ASCII_ELLIPSIS = "..."
  48
  49
  50 def MatchNameComponent(key, name_list, case_sensitive=True):
  51   """Try to match a name against a list.
  52
  53   This function will try to match a name like test1 against a list
  54   like C{['test1.example.com', 'test2.example.com', ...]}. Against
  55   this list, I{'test1'} as well as I{'test1.example'} will match, but
  56   not I{'test1.ex'}. A multiple match will be considered as no match
  57   at all (e.g. I{'test1'} against C{['test1.example.com',
  58   'test1.example.org']}), except when the key fully matches an entry
  59   (e.g. I{'test1'} against C{['test1', 'test1.example.com']}).
  60
  61   @type key: str
  62   @param key: the name to be searched
  63   @type name_list: list
  64   @param name_list: the list of strings against which to search the key
  65   @type case_sensitive: boolean
  66   @param case_sensitive: whether to provide a case-sensitive match
  67
  68   @rtype: None or str
  69   @return: None if there is no match I{or} if there are multiple matches,
  70       otherwise the element from the list which matches
  71
  72   """
  73   if key in name_list:
  74     return key
  75
  76   re_flags = 0
  77   if not case_sensitive:
  78     re_flags |= re.IGNORECASE
  79     key = key.upper()
  80
  81   name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags)
  82
  83   names_filtered = []
  84   string_matches = []
  85   for name in name_list:
  86     if name_re.match(name) is not None:
  87       names_filtered.append(name)
  88       if not case_sensitive and key == name.upper():
  89         string_matches.append(name)
  90
  91   if len(string_matches) == 1:
  92     return string_matches[0]
  93   if len(names_filtered) == 1:
  94     return names_filtered[0]
  95
  96   return None
  97
  98
  99 def _DnsNameGlobHelper(match):
 100   """Helper function for L{DnsNameGlobPattern}.
 101
 102   Returns regular expression pattern for parts of the pattern.
 103
 104   """
 105   text = match.group(0)
 106
 107   if text == "*":
 108     return "[^.]*"
 109   elif text == "?":
 110     return "[^.]"
 111   else:
 112     return re.escape(text)
 113
 114
 115 def DnsNameGlobPattern(pattern):
 116   """Generates regular expression from DNS name globbing pattern.
 117
 118   A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular
 119   expression. Escape sequences or ranges (e.g. [a-z]) are not supported.
 120
 121   Matching always starts at the leftmost part. An asterisk (*) matches all
 122   characters except the dot (.) separating DNS name parts. A question mark (?)
 123   matches a single character except the dot (.).
 124
 125   @type pattern: string
 126   @param pattern: DNS name globbing pattern
 127   @rtype: string
 128   @return: Regular expression
 129
 130   """
 131   return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
 132
 133
 134 def FormatUnit(value, units):
 135   """Formats an incoming number of MiB with the appropriate unit.
 136
 137   @type value: int
 138   @param value: integer representing the value in MiB (1048576)
 139   @type units: char
 140   @param units: the type of formatting we should do:
 141       - 'h' for automatic scaling
 142       - 'm' for MiBs
 143       - 'g' for GiBs
 144       - 't' for TiBs
 145   @rtype: str
 146   @return: the formatted value (with suffix)
 147
 148   """
 149   if units not in ("m", "g", "t", "h"):
 150     raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units))
 151
 152   suffix = ""
 153
 154   if units == "m" or (units == "h" and value < 1024):
 155     if units == "h":
 156       suffix = "M"
 157     return "%d%s" % (round(value, 0), suffix)
 158
 159   elif units == "g" or (units == "h" and value < (1024 * 1024)):
 160     if units == "h":
 161       suffix = "G"
 162     return "%0.1f%s" % (round(float(value) / 1024, 1), suffix)
 163
 164   else:
 165     if units == "h":
 166       suffix = "T"
 167     return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
 168
 169
 170 def ParseUnit(input_string):
 171   """Tries to extract number and scale from the given string.
 172
 173   Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE*
 174   [UNIT]}. If no unit is specified, it defaults to MiB. Return value
 175   is always an int in MiB.
 176
 177   """
 178   m = _PARSEUNIT_REGEX.match(str(input_string))
 179   if not m:
 180     raise errors.UnitParseError("Invalid format")
 181
 182   value = float(m.groups()[0])
 183
 184   unit = m.groups()[1]
 185   if unit:
 186     lcunit = unit.lower()
 187   else:
 188     lcunit = "m"
 189
 190   if lcunit in ("m", "mb", "mib"):
 191     # Value already in MiB
 192     pass
 193
 194   elif lcunit in ("g", "gb", "gib"):
 195     value *= 1024
 196
 197   elif lcunit in ("t", "tb", "tib"):
 198     value *= 1024 * 1024
 199
 200   else:
 201     raise errors.UnitParseError("Unknown unit: %s" % unit)
 202
 203   # Make sure we round up
 204   if int(value) < value:
 205     value += 1
 206
 207   # Round up to the next multiple of 4
 208   value = int(value)
 209   if value % 4:
 210     value += 4 - value % 4
 211
 212   return value
 213
 214
 215 def ShellQuote(value):
 216   """Quotes shell argument according to POSIX.
 217
 218   @type value: str
 219   @param value: the argument to be quoted
 220   @rtype: str
 221   @return: the quoted value
 222
 223   """
 224   if _SHELL_UNQUOTED_RE.match(value):
 225     return value
 226   else:
 227     return "'%s'" % value.replace("'", "'\\''")
 228
 229
 230 def ShellQuoteArgs(args):
 231   """Quotes a list of shell arguments.
 232
 233   @type args: list
 234   @param args: list of arguments to be quoted
 235   @rtype: str
 236   @return: the quoted arguments concatenated with spaces
 237
 238   """
 239   return " ".join([ShellQuote(i) for i in args])
 240
 241
 242 class ShellWriter:
 243   """Helper class to write scripts with indentation.
 244
 245   """
 246   INDENT_STR = "  "
 247
 248   def __init__(self, fh):
 249     """Initializes this class.
 250
 251     """
 252     self._fh = fh
 253     self._indent = 0
 254
 255   def IncIndent(self):
 256     """Increase indentation level by 1.
 257
 258     """
 259     self._indent += 1
 260
 261   def DecIndent(self):
 262     """Decrease indentation level by 1.
 263
 264     """
 265     assert self._indent > 0
 266     self._indent -= 1
 267
 268   def Write(self, txt, *args):
 269     """Write line to output file.
 270
 271     """
 272     assert self._indent >= 0
 273
 274     if args:
 275       line = txt % args
 276     else:
 277       line = txt
 278
 279     if line:
 280       # Indent only if there's something on the line
 281       self._fh.write(self._indent * self.INDENT_STR)
 282
 283     self._fh.write(line)
 284
 285     self._fh.write("\n")
 286
 287
 288 def GenerateSecret(numbytes=20):
 289   """Generates a random secret.
 290
 291   This will generate a pseudo-random secret returning an hex string
 292   (so that it can be used where an ASCII string is needed).
 293
 294   @param numbytes: the number of bytes which will be represented by the returned
 295       string (defaulting to 20, the length of a SHA1 hash)
 296   @rtype: str
 297   @return: an hex representation of the pseudo-random sequence
 298
 299   """
 300   return os.urandom(numbytes).encode("hex")
 301
 302
 303 def NormalizeAndValidateMac(mac):
 304   """Normalizes and check if a MAC address is valid.
 305
 306   Checks whether the supplied MAC address is formally correct, only
 307   accepts colon separated format. Normalize it to all lower.
 308
 309   @type mac: str
 310   @param mac: the MAC to be validated
 311   @rtype: str
 312   @return: returns the normalized and validated MAC.
 313
 314   @raise errors.OpPrereqError: If the MAC isn't valid
 315
 316   """
 317   if not _MAC_CHECK_RE.match(mac):
 318     raise errors.OpPrereqError("Invalid MAC address '%s'" % mac,
 319                                errors.ECODE_INVAL)
 320
 321   return mac.lower()
 322
 323
 324 def SafeEncode(text):
 325   """Return a 'safe' version of a source string.
 326
 327   This function mangles the input string and returns a version that
 328   should be safe to display/encode as ASCII. To this end, we first
 329   convert it to ASCII using the 'backslashreplace' encoding which
 330   should get rid of any non-ASCII chars, and then we process it
 331   through a loop copied from the string repr sources in the python; we
 332   don't use string_escape anymore since that escape single quotes and
 333   backslashes too, and that is too much; and that escaping is not
 334   stable, i.e. string_escape(string_escape(x)) != string_escape(x).
 335
 336   @type text: str or unicode
 337   @param text: input data
 338   @rtype: str
 339   @return: a safe version of text
 340
 341   """
 342   if isinstance(text, unicode):
 343     # only if unicode; if str already, we handle it below
 344     text = text.encode("ascii", "backslashreplace")
 345   resu = ""
 346   for char in text:
 347     c = ord(char)
 348     if char == "\t":
 349       resu += r"\t"
 350     elif char == "\n":
 351       resu += r"\n"
 352     elif char == "\r":
 353       resu += r'\'r'
 354     elif c < 32 or c >= 127: # non-printable
 355       resu += "\\x%02x" % (c & 0xff)
 356     else:
 357       resu += char
 358   return resu
 359
 360
 361 def UnescapeAndSplit(text, sep=","):
 362   """Split and unescape a string based on a given separator.
 363
 364   This function splits a string based on a separator where the
 365   separator itself can be escape in order to be an element of the
 366   elements. The escaping rules are (assuming coma being the
 367   separator):
 368     - a plain , separates the elements
 369     - a sequence \\\\, (double backslash plus comma) is handled as a
 370       backslash plus a separator comma
 371     - a sequence \, (backslash plus comma) is handled as a
 372       non-separator comma
 373
 374   @type text: string
 375   @param text: the string to split
 376   @type sep: string
 377   @param text: the separator
 378   @rtype: string
 379   @return: a list of strings
 380
 381   """
 382   # we split the list by sep (with no escaping at this stage)
 383   slist = text.split(sep)
 384   # next, we revisit the elements and if any of them ended with an odd
 385   # number of backslashes, then we join it with the next
 386   rlist = []
 387   while slist:
 388     e1 = slist.pop(0)
 389     if e1.endswith("\\"):
 390       num_b = len(e1) - len(e1.rstrip("\\"))
 391       if num_b % 2 == 1 and slist:
 392         e2 = slist.pop(0)
 393         # Merge the two elements and push the result back to the source list for
 394         # revisiting. If e2 ended with backslashes, further merging may need to
 395         # be done.
 396         slist.insert(0, e1 + sep + e2)
 397         continue
 398     # here the backslashes remain (all), and will be reduced in the next step
 399     rlist.append(e1)
 400   # finally, replace backslash-something with something
 401   rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist]
 402   return rlist
 403
 404
 405 def CommaJoin(names):
 406   """Nicely join a set of identifiers.
 407
 408   @param names: set, list or tuple
 409   @return: a string with the formatted results
 410
 411   """
 412   return ", ".join([str(val) for val in names])
 413
 414
 415 def FormatTime(val):
 416   """Formats a time value.
 417
 418   @type val: float or None
 419   @param val: Timestamp as returned by time.time() (seconds since Epoch,
 420     1970-01-01 00:00:00 UTC)
 421   @return: a string value or N/A if we don't have a valid timestamp
 422
 423   """
 424   if val is None or not isinstance(val, (int, float)):
 425     return "N/A"
 426   # these two codes works on Linux, but they are not guaranteed on all
 427   # platforms
 428   return time.strftime("%F %T", time.localtime(val))
 429
 430
 431 def FormatSeconds(secs):
 432   """Formats seconds for easier reading.
 433
 434   @type secs: number
 435   @param secs: Number of seconds
 436   @rtype: string
 437   @return: Formatted seconds (e.g. "2d 9h 19m 49s")
 438
 439   """
 440   parts = []
 441
 442   secs = round(secs, 0)
 443
 444   if secs > 0:
 445     # Negative values would be a bit tricky
 446     for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]:
 447       (complete, secs) = divmod(secs, one)
 448       if complete or parts:
 449         parts.append("%d%s" % (complete, unit))
 450
 451   parts.append("%ds" % secs)
 452
 453   return " ".join(parts)
 454
 455
 456 class LineSplitter:
 457   """Splits data chunks into lines separated by newline.
 458
 459   Instances provide a file-like interface.
 460
 461   """
 462   def __init__(self, line_fn, *args):
 463     """Initializes this class.
 464
 465     @type line_fn: callable
 466     @param line_fn: Function called for each line, first parameter is line
 467     @param args: Extra arguments for L{line_fn}
 468
 469     """
 470     assert callable(line_fn)
 471
 472     if args:
 473       # Python 2.4 doesn't have functools.partial yet
 474       self._line_fn = \
 475         lambda line: line_fn(line, *args) # pylint: disable=W0142
 476     else:
 477       self._line_fn = line_fn
 478
 479     self._lines = collections.deque()
 480     self._buffer = ""
 481
 482   def write(self, data):
 483     parts = (self._buffer + data).split("\n")
 484     self._buffer = parts.pop()
 485     self._lines.extend(parts)
 486
 487   def flush(self):
 488     while self._lines:
 489       self._line_fn(self._lines.popleft().rstrip("\r\n"))
 490
 491   def close(self):
 492     self.flush()
 493     if self._buffer:
 494       self._line_fn(self._buffer)
 495
 496
 497 def IsValidShellParam(word):
 498   """Verifies is the given word is safe from the shell's p.o.v.
 499
 500   This means that we can pass this to a command via the shell and be
 501   sure that it doesn't alter the command line and is passed as such to
 502   the actual command.
 503
 504   Note that we are overly restrictive here, in order to be on the safe
 505   side.
 506
 507   @type word: str
 508   @param word: the word to check
 509   @rtype: boolean
 510   @return: True if the word is 'safe'
 511
 512   """
 513   return bool(_SHELLPARAM_REGEX.match(word))
 514
 515
 516 def BuildShellCmd(template, *args):
 517   """Build a safe shell command line from the given arguments.
 518
 519   This function will check all arguments in the args list so that they
 520   are valid shell parameters (i.e. they don't contain shell
 521   metacharacters). If everything is ok, it will return the result of
 522   template % args.
 523
 524   @type template: str
 525   @param template: the string holding the template for the
 526       string formatting
 527   @rtype: str
 528   @return: the expanded command line
 529
 530   """
 531   for word in args:
 532     if not IsValidShellParam(word):
 533       raise errors.ProgrammerError("Shell argument '%s' contains"
 534                                    " invalid characters" % word)
 535   return template % args
 536
 537
 538 def FormatOrdinal(value):
 539   """Formats a number as an ordinal in the English language.
 540
 541   E.g. the number 1 becomes "1st", 22 becomes "22nd".
 542
 543   @type value: integer
 544   @param value: Number
 545   @rtype: string
 546
 547   """
 548   tens = value % 10
 549
 550   if value > 10 and value < 20:
 551     suffix = "th"
 552   elif tens == 1:
 553     suffix = "st"
 554   elif tens == 2:
 555     suffix = "nd"
 556   elif tens == 3:
 557     suffix = "rd"
 558   else:
 559     suffix = "th"
 560
 561   return "%s%s" % (value, suffix)
 562
 563
 564 def Truncate(text, length):
 565   """Truncate string and add ellipsis if needed.
 566
 567   @type text: string
 568   @param text: Text
 569   @type length: integer
 570   @param length: Desired length
 571   @rtype: string
 572   @return: Truncated text
 573
 574   """
 575   assert length > len(_ASCII_ELLIPSIS)
 576
 577   # Serialize if necessary
 578   if not isinstance(text, basestring):
 579     text = str(text)
 580
 581   if len(text) <= length:
 582     return text
 583   else:
 584     return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS