4 # Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Utility functions for manipulating or working with text.
31 from ganeti import errors
34 #: Unit checker regexp
35 _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")
37 #: Characters which don't need to be quoted for shell commands
38 _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$")
41 _MAC_CHECK_RE = re.compile("^([0-9a-f]{2}:){5}[0-9a-f]{2}$", re.I)
43 #: Shell param checker regexp
44 _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$")
46 #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026)
47 _ASCII_ELLIPSIS = "..."
50 def MatchNameComponent(key, name_list, case_sensitive=True):
51 """Try to match a name against a list.
53 This function will try to match a name like test1 against a list
54 like C{['test1.example.com', 'test2.example.com', ...]}. Against
55 this list, I{'test1'} as well as I{'test1.example'} will match, but
56 not I{'test1.ex'}. A multiple match will be considered as no match
57 at all (e.g. I{'test1'} against C{['test1.example.com',
58 'test1.example.org']}), except when the key fully matches an entry
59 (e.g. I{'test1'} against C{['test1', 'test1.example.com']}).
62 @param key: the name to be searched
64 @param name_list: the list of strings against which to search the key
65 @type case_sensitive: boolean
66 @param case_sensitive: whether to provide a case-sensitive match
69 @return: None if there is no match I{or} if there are multiple matches,
70 otherwise the element from the list which matches
77 if not case_sensitive:
78 re_flags |= re.IGNORECASE
81 name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags)
85 for name in name_list:
86 if name_re.match(name) is not None:
87 names_filtered.append(name)
88 if not case_sensitive and key == name.upper():
89 string_matches.append(name)
91 if len(string_matches) == 1:
92 return string_matches[0]
93 if len(names_filtered) == 1:
94 return names_filtered[0]
99 def _DnsNameGlobHelper(match):
100 """Helper function for L{DnsNameGlobPattern}.
102 Returns regular expression pattern for parts of the pattern.
105 text = match.group(0)
112 return re.escape(text)
115 def DnsNameGlobPattern(pattern):
116 """Generates regular expression from DNS name globbing pattern.
118 A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular
119 expression. Escape sequences or ranges (e.g. [a-z]) are not supported.
121 Matching always starts at the leftmost part. An asterisk (*) matches all
122 characters except the dot (.) separating DNS name parts. A question mark (?)
123 matches a single character except the dot (.).
125 @type pattern: string
126 @param pattern: DNS name globbing pattern
128 @return: Regular expression
131 return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
134 def FormatUnit(value, units):
135 """Formats an incoming number of MiB with the appropriate unit.
138 @param value: integer representing the value in MiB (1048576)
140 @param units: the type of formatting we should do:
141 - 'h' for automatic scaling
146 @return: the formatted value (with suffix)
149 if units not in ("m", "g", "t", "h"):
150 raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units))
154 if units == "m" or (units == "h" and value < 1024):
157 return "%d%s" % (round(value, 0), suffix)
159 elif units == "g" or (units == "h" and value < (1024 * 1024)):
162 return "%0.1f%s" % (round(float(value) / 1024, 1), suffix)
167 return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
170 def ParseUnit(input_string):
171 """Tries to extract number and scale from the given string.
173 Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE*
174 [UNIT]}. If no unit is specified, it defaults to MiB. Return value
175 is always an int in MiB.
178 m = _PARSEUNIT_REGEX.match(str(input_string))
180 raise errors.UnitParseError("Invalid format")
182 value = float(m.groups()[0])
186 lcunit = unit.lower()
190 if lcunit in ("m", "mb", "mib"):
191 # Value already in MiB
194 elif lcunit in ("g", "gb", "gib"):
197 elif lcunit in ("t", "tb", "tib"):
201 raise errors.UnitParseError("Unknown unit: %s" % unit)
203 # Make sure we round up
204 if int(value) < value:
207 # Round up to the next multiple of 4
210 value += 4 - value % 4
215 def ShellQuote(value):
216 """Quotes shell argument according to POSIX.
219 @param value: the argument to be quoted
221 @return: the quoted value
224 if _SHELL_UNQUOTED_RE.match(value):
227 return "'%s'" % value.replace("'", "'\\''")
230 def ShellQuoteArgs(args):
231 """Quotes a list of shell arguments.
234 @param args: list of arguments to be quoted
236 @return: the quoted arguments concatenated with spaces
239 return " ".join([ShellQuote(i) for i in args])
243 """Helper class to write scripts with indentation.
248 def __init__(self, fh):
249 """Initializes this class.
256 """Increase indentation level by 1.
262 """Decrease indentation level by 1.
265 assert self._indent > 0
268 def Write(self, txt, *args):
269 """Write line to output file.
272 assert self._indent >= 0
280 # Indent only if there's something on the line
281 self._fh.write(self._indent * self.INDENT_STR)
288 def GenerateSecret(numbytes=20):
289 """Generates a random secret.
291 This will generate a pseudo-random secret returning an hex string
292 (so that it can be used where an ASCII string is needed).
294 @param numbytes: the number of bytes which will be represented by the returned
295 string (defaulting to 20, the length of a SHA1 hash)
297 @return: an hex representation of the pseudo-random sequence
300 return os.urandom(numbytes).encode("hex")
303 def NormalizeAndValidateMac(mac):
304 """Normalizes and check if a MAC address is valid.
306 Checks whether the supplied MAC address is formally correct, only
307 accepts colon separated format. Normalize it to all lower.
310 @param mac: the MAC to be validated
312 @return: returns the normalized and validated MAC.
314 @raise errors.OpPrereqError: If the MAC isn't valid
317 if not _MAC_CHECK_RE.match(mac):
318 raise errors.OpPrereqError("Invalid MAC address '%s'" % mac,
324 def SafeEncode(text):
325 """Return a 'safe' version of a source string.
327 This function mangles the input string and returns a version that
328 should be safe to display/encode as ASCII. To this end, we first
329 convert it to ASCII using the 'backslashreplace' encoding which
330 should get rid of any non-ASCII chars, and then we process it
331 through a loop copied from the string repr sources in the python; we
332 don't use string_escape anymore since that escape single quotes and
333 backslashes too, and that is too much; and that escaping is not
334 stable, i.e. string_escape(string_escape(x)) != string_escape(x).
336 @type text: str or unicode
337 @param text: input data
339 @return: a safe version of text
342 if isinstance(text, unicode):
343 # only if unicode; if str already, we handle it below
344 text = text.encode("ascii", "backslashreplace")
354 elif c < 32 or c >= 127: # non-printable
355 resu += "\\x%02x" % (c & 0xff)
361 def UnescapeAndSplit(text, sep=","):
362 """Split and unescape a string based on a given separator.
364 This function splits a string based on a separator where the
365 separator itself can be escape in order to be an element of the
366 elements. The escaping rules are (assuming coma being the
368 - a plain , separates the elements
369 - a sequence \\\\, (double backslash plus comma) is handled as a
370 backslash plus a separator comma
371 - a sequence \, (backslash plus comma) is handled as a
375 @param text: the string to split
377 @param text: the separator
379 @return: a list of strings
382 # we split the list by sep (with no escaping at this stage)
383 slist = text.split(sep)
384 # next, we revisit the elements and if any of them ended with an odd
385 # number of backslashes, then we join it with the next
389 if e1.endswith("\\"):
390 num_b = len(e1) - len(e1.rstrip("\\"))
391 if num_b % 2 == 1 and slist:
393 # Merge the two elements and push the result back to the source list for
394 # revisiting. If e2 ended with backslashes, further merging may need to
396 slist.insert(0, e1 + sep + e2)
398 # here the backslashes remain (all), and will be reduced in the next step
400 # finally, replace backslash-something with something
401 rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist]
405 def CommaJoin(names):
406 """Nicely join a set of identifiers.
408 @param names: set, list or tuple
409 @return: a string with the formatted results
412 return ", ".join([str(val) for val in names])
416 """Formats a time value.
418 @type val: float or None
419 @param val: Timestamp as returned by time.time() (seconds since Epoch,
420 1970-01-01 00:00:00 UTC)
421 @return: a string value or N/A if we don't have a valid timestamp
424 if val is None or not isinstance(val, (int, float)):
426 # these two codes works on Linux, but they are not guaranteed on all
428 return time.strftime("%F %T", time.localtime(val))
431 def FormatSeconds(secs):
432 """Formats seconds for easier reading.
435 @param secs: Number of seconds
437 @return: Formatted seconds (e.g. "2d 9h 19m 49s")
442 secs = round(secs, 0)
445 # Negative values would be a bit tricky
446 for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]:
447 (complete, secs) = divmod(secs, one)
448 if complete or parts:
449 parts.append("%d%s" % (complete, unit))
451 parts.append("%ds" % secs)
453 return " ".join(parts)
457 """Splits data chunks into lines separated by newline.
459 Instances provide a file-like interface.
462 def __init__(self, line_fn, *args):
463 """Initializes this class.
465 @type line_fn: callable
466 @param line_fn: Function called for each line, first parameter is line
467 @param args: Extra arguments for L{line_fn}
470 assert callable(line_fn)
473 # Python 2.4 doesn't have functools.partial yet
475 lambda line: line_fn(line, *args) # pylint: disable=W0142
477 self._line_fn = line_fn
479 self._lines = collections.deque()
482 def write(self, data):
483 parts = (self._buffer + data).split("\n")
484 self._buffer = parts.pop()
485 self._lines.extend(parts)
489 self._line_fn(self._lines.popleft().rstrip("\r\n"))
494 self._line_fn(self._buffer)
497 def IsValidShellParam(word):
498 """Verifies is the given word is safe from the shell's p.o.v.
500 This means that we can pass this to a command via the shell and be
501 sure that it doesn't alter the command line and is passed as such to
504 Note that we are overly restrictive here, in order to be on the safe
508 @param word: the word to check
510 @return: True if the word is 'safe'
513 return bool(_SHELLPARAM_REGEX.match(word))
516 def BuildShellCmd(template, *args):
517 """Build a safe shell command line from the given arguments.
519 This function will check all arguments in the args list so that they
520 are valid shell parameters (i.e. they don't contain shell
521 metacharacters). If everything is ok, it will return the result of
525 @param template: the string holding the template for the
528 @return: the expanded command line
532 if not IsValidShellParam(word):
533 raise errors.ProgrammerError("Shell argument '%s' contains"
534 " invalid characters" % word)
535 return template % args
538 def FormatOrdinal(value):
539 """Formats a number as an ordinal in the English language.
541 E.g. the number 1 becomes "1st", 22 becomes "22nd".
550 if value > 10 and value < 20:
561 return "%s%s" % (value, suffix)
564 def Truncate(text, length):
565 """Truncate string and add ellipsis if needed.
569 @type length: integer
570 @param length: Desired length
572 @return: Truncated text
575 assert length > len(_ASCII_ELLIPSIS)
577 # Serialize if necessary
578 if not isinstance(text, basestring):
581 if len(text) <= length:
584 return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS