4 # Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Utility functions for manipulating or working with text.
31 from ganeti import errors
34 #: Unit checker regexp
35 _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")
37 #: Characters which don't need to be quoted for shell commands
38 _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$")
41 _MAC_CHECK_RE = re.compile("^([0-9a-f]{2}:){5}[0-9a-f]{2}$", re.I)
43 #: Shell param checker regexp
44 _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$")
46 #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026)
47 _ASCII_ELLIPSIS = "..."
50 def MatchNameComponent(key, name_list, case_sensitive=True):
51 """Try to match a name against a list.
53 This function will try to match a name like test1 against a list
54 like C{['test1.example.com', 'test2.example.com', ...]}. Against
55 this list, I{'test1'} as well as I{'test1.example'} will match, but
56 not I{'test1.ex'}. A multiple match will be considered as no match
57 at all (e.g. I{'test1'} against C{['test1.example.com',
58 'test1.example.org']}), except when the key fully matches an entry
59 (e.g. I{'test1'} against C{['test1', 'test1.example.com']}).
62 @param key: the name to be searched
64 @param name_list: the list of strings against which to search the key
65 @type case_sensitive: boolean
66 @param case_sensitive: whether to provide a case-sensitive match
69 @return: None if there is no match I{or} if there are multiple matches,
70 otherwise the element from the list which matches
77 if not case_sensitive:
78 re_flags |= re.IGNORECASE
81 name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags)
85 for name in name_list:
86 if name_re.match(name) is not None:
87 names_filtered.append(name)
88 if not case_sensitive and key == name.upper():
89 string_matches.append(name)
91 if len(string_matches) == 1:
92 return string_matches[0]
93 if len(names_filtered) == 1:
94 return names_filtered[0]
99 def _DnsNameGlobHelper(match):
100 """Helper function for L{DnsNameGlobPattern}.
102 Returns regular expression pattern for parts of the pattern.
105 text = match.group(0)
112 return re.escape(text)
115 def DnsNameGlobPattern(pattern):
116 """Generates regular expression from DNS name globbing pattern.
118 A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular
119 expression. Escape sequences or ranges (e.g. [a-z]) are not supported.
121 Matching always starts at the leftmost part. An asterisk (*) matches all
122 characters except the dot (.) separating DNS name parts. A question mark (?)
123 matches a single character except the dot (.).
125 @type pattern: string
126 @param pattern: DNS name globbing pattern
128 @return: Regular expression
131 return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
134 def FormatUnit(value, units):
135 """Formats an incoming number of MiB with the appropriate unit.
138 @param value: integer representing the value in MiB (1048576)
140 @param units: the type of formatting we should do:
141 - 'h' for automatic scaling
146 @return: the formatted value (with suffix)
149 if units not in ("m", "g", "t", "h"):
150 raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units))
154 if units == "m" or (units == "h" and value < 1024):
157 return "%d%s" % (round(value, 0), suffix)
159 elif units == "g" or (units == "h" and value < (1024 * 1024)):
162 return "%0.1f%s" % (round(float(value) / 1024, 1), suffix)
167 return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
170 def ParseUnit(input_string):
171 """Tries to extract number and scale from the given string.
173 Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE*
174 [UNIT]}. If no unit is specified, it defaults to MiB. Return value
175 is always an int in MiB.
178 m = _PARSEUNIT_REGEX.match(str(input_string))
180 raise errors.UnitParseError("Invalid format")
182 value = float(m.groups()[0])
186 lcunit = unit.lower()
190 if lcunit in ("m", "mb", "mib"):
191 # Value already in MiB
194 elif lcunit in ("g", "gb", "gib"):
197 elif lcunit in ("t", "tb", "tib"):
201 raise errors.UnitParseError("Unknown unit: %s" % unit)
203 # Make sure we round up
204 if int(value) < value:
207 # Round up to the next multiple of 4
210 value += 4 - value % 4
215 def ShellQuote(value):
216 """Quotes shell argument according to POSIX.
219 @param value: the argument to be quoted
221 @return: the quoted value
224 if _SHELL_UNQUOTED_RE.match(value):
227 return "'%s'" % value.replace("'", "'\\''")
230 def ShellQuoteArgs(args):
231 """Quotes a list of shell arguments.
234 @param args: list of arguments to be quoted
236 @return: the quoted arguments concatenated with spaces
239 return " ".join([ShellQuote(i) for i in args])
243 """Helper class to write scripts with indentation.
248 def __init__(self, fh, indent=True):
249 """Initializes this class.
253 self._indent_enabled = indent
257 """Increase indentation level by 1.
263 """Decrease indentation level by 1.
266 assert self._indent > 0
269 def Write(self, txt, *args):
270 """Write line to output file.
273 assert self._indent >= 0
280 if line and self._indent_enabled:
281 # Indent only if there's something on the line
282 self._fh.write(self._indent * self.INDENT_STR)
289 def GenerateSecret(numbytes=20):
290 """Generates a random secret.
292 This will generate a pseudo-random secret returning an hex string
293 (so that it can be used where an ASCII string is needed).
295 @param numbytes: the number of bytes which will be represented by the returned
296 string (defaulting to 20, the length of a SHA1 hash)
298 @return: an hex representation of the pseudo-random sequence
301 return os.urandom(numbytes).encode("hex")
304 def NormalizeAndValidateMac(mac):
305 """Normalizes and check if a MAC address is valid.
307 Checks whether the supplied MAC address is formally correct, only
308 accepts colon separated format. Normalize it to all lower.
311 @param mac: the MAC to be validated
313 @return: returns the normalized and validated MAC.
315 @raise errors.OpPrereqError: If the MAC isn't valid
318 if not _MAC_CHECK_RE.match(mac):
319 raise errors.OpPrereqError("Invalid MAC address '%s'" % mac,
325 def SafeEncode(text):
326 """Return a 'safe' version of a source string.
328 This function mangles the input string and returns a version that
329 should be safe to display/encode as ASCII. To this end, we first
330 convert it to ASCII using the 'backslashreplace' encoding which
331 should get rid of any non-ASCII chars, and then we process it
332 through a loop copied from the string repr sources in the python; we
333 don't use string_escape anymore since that escape single quotes and
334 backslashes too, and that is too much; and that escaping is not
335 stable, i.e. string_escape(string_escape(x)) != string_escape(x).
337 @type text: str or unicode
338 @param text: input data
340 @return: a safe version of text
343 if isinstance(text, unicode):
344 # only if unicode; if str already, we handle it below
345 text = text.encode("ascii", "backslashreplace")
355 elif c < 32 or c >= 127: # non-printable
356 resu += "\\x%02x" % (c & 0xff)
362 def UnescapeAndSplit(text, sep=","):
363 """Split and unescape a string based on a given separator.
365 This function splits a string based on a separator where the
366 separator itself can be escape in order to be an element of the
367 elements. The escaping rules are (assuming coma being the
369 - a plain , separates the elements
370 - a sequence \\\\, (double backslash plus comma) is handled as a
371 backslash plus a separator comma
372 - a sequence \, (backslash plus comma) is handled as a
376 @param text: the string to split
378 @param text: the separator
380 @return: a list of strings
383 # we split the list by sep (with no escaping at this stage)
384 slist = text.split(sep)
385 # next, we revisit the elements and if any of them ended with an odd
386 # number of backslashes, then we join it with the next
390 if e1.endswith("\\"):
391 num_b = len(e1) - len(e1.rstrip("\\"))
392 if num_b % 2 == 1 and slist:
394 # Merge the two elements and push the result back to the source list for
395 # revisiting. If e2 ended with backslashes, further merging may need to
397 slist.insert(0, e1 + sep + e2)
399 # here the backslashes remain (all), and will be reduced in the next step
401 # finally, replace backslash-something with something
402 rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist]
406 def CommaJoin(names):
407 """Nicely join a set of identifiers.
409 @param names: set, list or tuple
410 @return: a string with the formatted results
413 return ", ".join([str(val) for val in names])
416 def FormatTime(val, usecs=None):
417 """Formats a time value.
419 @type val: float or None
420 @param val: Timestamp as returned by time.time() (seconds since Epoch,
421 1970-01-01 00:00:00 UTC)
422 @return: a string value or N/A if we don't have a valid timestamp
425 if val is None or not isinstance(val, (int, float)):
428 # these two codes works on Linux, but they are not guaranteed on all
430 result = time.strftime("%F %T", time.localtime(val))
432 if usecs is not None:
433 result += ".%06d" % usecs
438 def FormatSeconds(secs):
439 """Formats seconds for easier reading.
442 @param secs: Number of seconds
444 @return: Formatted seconds (e.g. "2d 9h 19m 49s")
449 secs = round(secs, 0)
452 # Negative values would be a bit tricky
453 for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]:
454 (complete, secs) = divmod(secs, one)
455 if complete or parts:
456 parts.append("%d%s" % (complete, unit))
458 parts.append("%ds" % secs)
460 return " ".join(parts)
464 """Splits data chunks into lines separated by newline.
466 Instances provide a file-like interface.
469 def __init__(self, line_fn, *args):
470 """Initializes this class.
472 @type line_fn: callable
473 @param line_fn: Function called for each line, first parameter is line
474 @param args: Extra arguments for L{line_fn}
477 assert callable(line_fn)
480 # Python 2.4 doesn't have functools.partial yet
482 lambda line: line_fn(line, *args) # pylint: disable=W0142
484 self._line_fn = line_fn
486 self._lines = collections.deque()
489 def write(self, data):
490 parts = (self._buffer + data).split("\n")
491 self._buffer = parts.pop()
492 self._lines.extend(parts)
496 self._line_fn(self._lines.popleft().rstrip("\r\n"))
501 self._line_fn(self._buffer)
504 def IsValidShellParam(word):
505 """Verifies is the given word is safe from the shell's p.o.v.
507 This means that we can pass this to a command via the shell and be
508 sure that it doesn't alter the command line and is passed as such to
511 Note that we are overly restrictive here, in order to be on the safe
515 @param word: the word to check
517 @return: True if the word is 'safe'
520 return bool(_SHELLPARAM_REGEX.match(word))
523 def BuildShellCmd(template, *args):
524 """Build a safe shell command line from the given arguments.
526 This function will check all arguments in the args list so that they
527 are valid shell parameters (i.e. they don't contain shell
528 metacharacters). If everything is ok, it will return the result of
532 @param template: the string holding the template for the
535 @return: the expanded command line
539 if not IsValidShellParam(word):
540 raise errors.ProgrammerError("Shell argument '%s' contains"
541 " invalid characters" % word)
542 return template % args
545 def FormatOrdinal(value):
546 """Formats a number as an ordinal in the English language.
548 E.g. the number 1 becomes "1st", 22 becomes "22nd".
557 if value > 10 and value < 20:
568 return "%s%s" % (value, suffix)
571 def Truncate(text, length):
572 """Truncate string and add ellipsis if needed.
576 @type length: integer
577 @param length: Desired length
579 @return: Truncated text
582 assert length > len(_ASCII_ELLIPSIS)
584 # Serialize if necessary
585 if not isinstance(text, basestring):
588 if len(text) <= length:
591 return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS
594 def FilterEmptyLinesAndComments(text):
595 """Filters empty lines and comments from a line-based string.
597 Whitespace is also removed from the beginning and end of all lines.
600 @param text: Input string
604 return [line for line in map(lambda s: s.strip(), text.splitlines())
605 # Ignore empty lines and comments
606 if line and not line.startswith("#")]