code.grnet.gr Git - ganeti-local/blob - lib/utils/text.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21 """Utility functions for manipulating or working with text.
  22
  23 """
  24
  25
  26 import re
  27 import os
  28 import time
  29 import collections
  30
  31 from ganeti import errors
  32
  33
  34 #: Unit checker regexp
  35 _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")
  36
  37 #: Characters which don't need to be quoted for shell commands
  38 _SHELL_UNQUOTED_RE = re.compile('^[-.,=:/_+@A-Za-z0-9]+$')
  39
  40 #: MAC checker regexp
  41 _MAC_CHECK_RE = re.compile("^([0-9a-f]{2}:){5}[0-9a-f]{2}$", re.I)
  42
  43 #: Shell param checker regexp
  44 _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$")
  45
  46
  47 def MatchNameComponent(key, name_list, case_sensitive=True):
  48   """Try to match a name against a list.
  49
  50   This function will try to match a name like test1 against a list
  51   like C{['test1.example.com', 'test2.example.com', ...]}. Against
  52   this list, I{'test1'} as well as I{'test1.example'} will match, but
  53   not I{'test1.ex'}. A multiple match will be considered as no match
  54   at all (e.g. I{'test1'} against C{['test1.example.com',
  55   'test1.example.org']}), except when the key fully matches an entry
  56   (e.g. I{'test1'} against C{['test1', 'test1.example.com']}).
  57
  58   @type key: str
  59   @param key: the name to be searched
  60   @type name_list: list
  61   @param name_list: the list of strings against which to search the key
  62   @type case_sensitive: boolean
  63   @param case_sensitive: whether to provide a case-sensitive match
  64
  65   @rtype: None or str
  66   @return: None if there is no match I{or} if there are multiple matches,
  67       otherwise the element from the list which matches
  68
  69   """
  70   if key in name_list:
  71     return key
  72
  73   re_flags = 0
  74   if not case_sensitive:
  75     re_flags |= re.IGNORECASE
  76     key = key.upper()
  77   mo = re.compile("^%s(\..*)?$" % re.escape(key), re_flags)
  78   names_filtered = []
  79   string_matches = []
  80   for name in name_list:
  81     if mo.match(name) is not None:
  82       names_filtered.append(name)
  83       if not case_sensitive and key == name.upper():
  84         string_matches.append(name)
  85
  86   if len(string_matches) == 1:
  87     return string_matches[0]
  88   if len(names_filtered) == 1:
  89     return names_filtered[0]
  90   return None
  91
  92
  93 def FormatUnit(value, units):
  94   """Formats an incoming number of MiB with the appropriate unit.
  95
  96   @type value: int
  97   @param value: integer representing the value in MiB (1048576)
  98   @type units: char
  99   @param units: the type of formatting we should do:
 100       - 'h' for automatic scaling
 101       - 'm' for MiBs
 102       - 'g' for GiBs
 103       - 't' for TiBs
 104   @rtype: str
 105   @return: the formatted value (with suffix)
 106
 107   """
 108   if units not in ('m', 'g', 't', 'h'):
 109     raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units))
 110
 111   suffix = ''
 112
 113   if units == 'm' or (units == 'h' and value < 1024):
 114     if units == 'h':
 115       suffix = 'M'
 116     return "%d%s" % (round(value, 0), suffix)
 117
 118   elif units == 'g' or (units == 'h' and value < (1024 * 1024)):
 119     if units == 'h':
 120       suffix = 'G'
 121     return "%0.1f%s" % (round(float(value) / 1024, 1), suffix)
 122
 123   else:
 124     if units == 'h':
 125       suffix = 'T'
 126     return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
 127
 128
 129 def ParseUnit(input_string):
 130   """Tries to extract number and scale from the given string.
 131
 132   Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE*
 133   [UNIT]}. If no unit is specified, it defaults to MiB. Return value
 134   is always an int in MiB.
 135
 136   """
 137   m = _PARSEUNIT_REGEX.match(str(input_string))
 138   if not m:
 139     raise errors.UnitParseError("Invalid format")
 140
 141   value = float(m.groups()[0])
 142
 143   unit = m.groups()[1]
 144   if unit:
 145     lcunit = unit.lower()
 146   else:
 147     lcunit = 'm'
 148
 149   if lcunit in ('m', 'mb', 'mib'):
 150     # Value already in MiB
 151     pass
 152
 153   elif lcunit in ('g', 'gb', 'gib'):
 154     value *= 1024
 155
 156   elif lcunit in ('t', 'tb', 'tib'):
 157     value *= 1024 * 1024
 158
 159   else:
 160     raise errors.UnitParseError("Unknown unit: %s" % unit)
 161
 162   # Make sure we round up
 163   if int(value) < value:
 164     value += 1
 165
 166   # Round up to the next multiple of 4
 167   value = int(value)
 168   if value % 4:
 169     value += 4 - value % 4
 170
 171   return value
 172
 173
 174 def ShellQuote(value):
 175   """Quotes shell argument according to POSIX.
 176
 177   @type value: str
 178   @param value: the argument to be quoted
 179   @rtype: str
 180   @return: the quoted value
 181
 182   """
 183   if _SHELL_UNQUOTED_RE.match(value):
 184     return value
 185   else:
 186     return "'%s'" % value.replace("'", "'\\''")
 187
 188
 189 def ShellQuoteArgs(args):
 190   """Quotes a list of shell arguments.
 191
 192   @type args: list
 193   @param args: list of arguments to be quoted
 194   @rtype: str
 195   @return: the quoted arguments concatenated with spaces
 196
 197   """
 198   return " ".join([ShellQuote(i) for i in args])
 199
 200
 201 class ShellWriter:
 202   """Helper class to write scripts with indentation.
 203
 204   """
 205   INDENT_STR = "  "
 206
 207   def __init__(self, fh):
 208     """Initializes this class.
 209
 210     """
 211     self._fh = fh
 212     self._indent = 0
 213
 214   def IncIndent(self):
 215     """Increase indentation level by 1.
 216
 217     """
 218     self._indent += 1
 219
 220   def DecIndent(self):
 221     """Decrease indentation level by 1.
 222
 223     """
 224     assert self._indent > 0
 225     self._indent -= 1
 226
 227   def Write(self, txt, *args):
 228     """Write line to output file.
 229
 230     """
 231     assert self._indent >= 0
 232
 233     self._fh.write(self._indent * self.INDENT_STR)
 234
 235     if args:
 236       self._fh.write(txt % args)
 237     else:
 238       self._fh.write(txt)
 239
 240     self._fh.write("\n")
 241
 242
 243 def GenerateSecret(numbytes=20):
 244   """Generates a random secret.
 245
 246   This will generate a pseudo-random secret returning an hex string
 247   (so that it can be used where an ASCII string is needed).
 248
 249   @param numbytes: the number of bytes which will be represented by the returned
 250       string (defaulting to 20, the length of a SHA1 hash)
 251   @rtype: str
 252   @return: an hex representation of the pseudo-random sequence
 253
 254   """
 255   return os.urandom(numbytes).encode("hex")
 256
 257
 258 def NormalizeAndValidateMac(mac):
 259   """Normalizes and check if a MAC address is valid.
 260
 261   Checks whether the supplied MAC address is formally correct, only
 262   accepts colon separated format. Normalize it to all lower.
 263
 264   @type mac: str
 265   @param mac: the MAC to be validated
 266   @rtype: str
 267   @return: returns the normalized and validated MAC.
 268
 269   @raise errors.OpPrereqError: If the MAC isn't valid
 270
 271   """
 272   if not _MAC_CHECK_RE.match(mac):
 273     raise errors.OpPrereqError("Invalid MAC address '%s'" % mac,
 274                                errors.ECODE_INVAL)
 275
 276   return mac.lower()
 277
 278
 279 def SafeEncode(text):
 280   """Return a 'safe' version of a source string.
 281
 282   This function mangles the input string and returns a version that
 283   should be safe to display/encode as ASCII. To this end, we first
 284   convert it to ASCII using the 'backslashreplace' encoding which
 285   should get rid of any non-ASCII chars, and then we process it
 286   through a loop copied from the string repr sources in the python; we
 287   don't use string_escape anymore since that escape single quotes and
 288   backslashes too, and that is too much; and that escaping is not
 289   stable, i.e. string_escape(string_escape(x)) != string_escape(x).
 290
 291   @type text: str or unicode
 292   @param text: input data
 293   @rtype: str
 294   @return: a safe version of text
 295
 296   """
 297   if isinstance(text, unicode):
 298     # only if unicode; if str already, we handle it below
 299     text = text.encode('ascii', 'backslashreplace')
 300   resu = ""
 301   for char in text:
 302     c = ord(char)
 303     if char  == '\t':
 304       resu += r'\t'
 305     elif char == '\n':
 306       resu += r'\n'
 307     elif char == '\r':
 308       resu += r'\'r'
 309     elif c < 32 or c >= 127: # non-printable
 310       resu += "\\x%02x" % (c & 0xff)
 311     else:
 312       resu += char
 313   return resu
 314
 315
 316 def UnescapeAndSplit(text, sep=","):
 317   """Split and unescape a string based on a given separator.
 318
 319   This function splits a string based on a separator where the
 320   separator itself can be escape in order to be an element of the
 321   elements. The escaping rules are (assuming coma being the
 322   separator):
 323     - a plain , separates the elements
 324     - a sequence \\\\, (double backslash plus comma) is handled as a
 325       backslash plus a separator comma
 326     - a sequence \, (backslash plus comma) is handled as a
 327       non-separator comma
 328
 329   @type text: string
 330   @param text: the string to split
 331   @type sep: string
 332   @param text: the separator
 333   @rtype: string
 334   @return: a list of strings
 335
 336   """
 337   # we split the list by sep (with no escaping at this stage)
 338   slist = text.split(sep)
 339   # next, we revisit the elements and if any of them ended with an odd
 340   # number of backslashes, then we join it with the next
 341   rlist = []
 342   while slist:
 343     e1 = slist.pop(0)
 344     if e1.endswith("\\"):
 345       num_b = len(e1) - len(e1.rstrip("\\"))
 346       if num_b % 2 == 1:
 347         e2 = slist.pop(0)
 348         # here the backslashes remain (all), and will be reduced in
 349         # the next step
 350         rlist.append(e1 + sep + e2)
 351         continue
 352     rlist.append(e1)
 353   # finally, replace backslash-something with something
 354   rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist]
 355   return rlist
 356
 357
 358 def CommaJoin(names):
 359   """Nicely join a set of identifiers.
 360
 361   @param names: set, list or tuple
 362   @return: a string with the formatted results
 363
 364   """
 365   return ", ".join([str(val) for val in names])
 366
 367
 368 def FormatTime(val):
 369   """Formats a time value.
 370
 371   @type val: float or None
 372   @param val: Timestamp as returned by time.time() (seconds since Epoch,
 373     1970-01-01 00:00:00 UTC)
 374   @return: a string value or N/A if we don't have a valid timestamp
 375
 376   """
 377   if val is None or not isinstance(val, (int, float)):
 378     return "N/A"
 379   # these two codes works on Linux, but they are not guaranteed on all
 380   # platforms
 381   return time.strftime("%F %T", time.localtime(val))
 382
 383
 384 def FormatSeconds(secs):
 385   """Formats seconds for easier reading.
 386
 387   @type secs: number
 388   @param secs: Number of seconds
 389   @rtype: string
 390   @return: Formatted seconds (e.g. "2d 9h 19m 49s")
 391
 392   """
 393   parts = []
 394
 395   secs = round(secs, 0)
 396
 397   if secs > 0:
 398     # Negative values would be a bit tricky
 399     for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]:
 400       (complete, secs) = divmod(secs, one)
 401       if complete or parts:
 402         parts.append("%d%s" % (complete, unit))
 403
 404   parts.append("%ds" % secs)
 405
 406   return " ".join(parts)
 407
 408
 409 class LineSplitter:
 410   """Splits data chunks into lines separated by newline.
 411
 412   Instances provide a file-like interface.
 413
 414   """
 415   def __init__(self, line_fn, *args):
 416     """Initializes this class.
 417
 418     @type line_fn: callable
 419     @param line_fn: Function called for each line, first parameter is line
 420     @param args: Extra arguments for L{line_fn}
 421
 422     """
 423     assert callable(line_fn)
 424
 425     if args:
 426       # Python 2.4 doesn't have functools.partial yet
 427       self._line_fn = \
 428         lambda line: line_fn(line, *args) # pylint: disable-msg=W0142
 429     else:
 430       self._line_fn = line_fn
 431
 432     self._lines = collections.deque()
 433     self._buffer = ""
 434
 435   def write(self, data):
 436     parts = (self._buffer + data).split("\n")
 437     self._buffer = parts.pop()
 438     self._lines.extend(parts)
 439
 440   def flush(self):
 441     while self._lines:
 442       self._line_fn(self._lines.popleft().rstrip("\r\n"))
 443
 444   def close(self):
 445     self.flush()
 446     if self._buffer:
 447       self._line_fn(self._buffer)
 448
 449
 450 def IsValidShellParam(word):
 451   """Verifies is the given word is safe from the shell's p.o.v.
 452
 453   This means that we can pass this to a command via the shell and be
 454   sure that it doesn't alter the command line and is passed as such to
 455   the actual command.
 456
 457   Note that we are overly restrictive here, in order to be on the safe
 458   side.
 459
 460   @type word: str
 461   @param word: the word to check
 462   @rtype: boolean
 463   @return: True if the word is 'safe'
 464
 465   """
 466   return bool(_SHELLPARAM_REGEX.match(word))
 467
 468
 469 def BuildShellCmd(template, *args):
 470   """Build a safe shell command line from the given arguments.
 471
 472   This function will check all arguments in the args list so that they
 473   are valid shell parameters (i.e. they don't contain shell
 474   metacharacters). If everything is ok, it will return the result of
 475   template % args.
 476
 477   @type template: str
 478   @param template: the string holding the template for the
 479       string formatting
 480   @rtype: str
 481   @return: the expanded command line
 482
 483   """
 484   for word in args:
 485     if not IsValidShellParam(word):
 486       raise errors.ProgrammerError("Shell argument '%s' contains"
 487                                    " invalid characters" % word)
 488   return template % args
 489
 490
 491 def FormatOrdinal(value):
 492   """Formats a number as an ordinal in the English language.
 493
 494   E.g. the number 1 becomes "1st", 22 becomes "22nd".
 495
 496   @type value: integer
 497   @param value: Number
 498   @rtype: string
 499
 500   """
 501   tens = value % 10
 502
 503   if value > 10 and value < 20:
 504     suffix = "th"
 505   elif tens == 1:
 506     suffix = "st"
 507   elif tens == 2:
 508     suffix = "nd"
 509   elif tens == 3:
 510     suffix = "rd"
 511   else:
 512     suffix = "th"
 513
 514   return "%s%s" % (value, suffix)