4 # Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Utility functions for manipulating or working with text.
31 from ganeti import errors
34 #: Unit checker regexp
35 _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")
37 #: Characters which don't need to be quoted for shell commands
38 _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$")
40 #: Shell param checker regexp
41 _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$")
43 #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026)
44 _ASCII_ELLIPSIS = "..."
47 _MAC_ADDR_OCTET_RE = r"[0-9a-f]{2}"
50 def MatchNameComponent(key, name_list, case_sensitive=True):
51 """Try to match a name against a list.
53 This function will try to match a name like test1 against a list
54 like C{['test1.example.com', 'test2.example.com', ...]}. Against
55 this list, I{'test1'} as well as I{'test1.example'} will match, but
56 not I{'test1.ex'}. A multiple match will be considered as no match
57 at all (e.g. I{'test1'} against C{['test1.example.com',
58 'test1.example.org']}), except when the key fully matches an entry
59 (e.g. I{'test1'} against C{['test1', 'test1.example.com']}).
62 @param key: the name to be searched
64 @param name_list: the list of strings against which to search the key
65 @type case_sensitive: boolean
66 @param case_sensitive: whether to provide a case-sensitive match
69 @return: None if there is no match I{or} if there are multiple matches,
70 otherwise the element from the list which matches
77 if not case_sensitive:
78 re_flags |= re.IGNORECASE
81 name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags)
85 for name in name_list:
86 if name_re.match(name) is not None:
87 names_filtered.append(name)
88 if not case_sensitive and key == name.upper():
89 string_matches.append(name)
91 if len(string_matches) == 1:
92 return string_matches[0]
93 if len(names_filtered) == 1:
94 return names_filtered[0]
99 def _DnsNameGlobHelper(match):
100 """Helper function for L{DnsNameGlobPattern}.
102 Returns regular expression pattern for parts of the pattern.
105 text = match.group(0)
112 return re.escape(text)
115 def DnsNameGlobPattern(pattern):
116 """Generates regular expression from DNS name globbing pattern.
118 A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular
119 expression. Escape sequences or ranges (e.g. [a-z]) are not supported.
121 Matching always starts at the leftmost part. An asterisk (*) matches all
122 characters except the dot (.) separating DNS name parts. A question mark (?)
123 matches a single character except the dot (.).
125 @type pattern: string
126 @param pattern: DNS name globbing pattern
128 @return: Regular expression
131 return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
134 def FormatUnit(value, units):
135 """Formats an incoming number of MiB with the appropriate unit.
138 @param value: integer representing the value in MiB (1048576)
140 @param units: the type of formatting we should do:
141 - 'h' for automatic scaling
146 @return: the formatted value (with suffix)
149 if units not in ("m", "g", "t", "h"):
150 raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units))
154 if units == "m" or (units == "h" and value < 1024):
157 return "%d%s" % (round(value, 0), suffix)
159 elif units == "g" or (units == "h" and value < (1024 * 1024)):
162 return "%0.1f%s" % (round(float(value) / 1024, 1), suffix)
167 return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
170 def ParseUnit(input_string):
171 """Tries to extract number and scale from the given string.
173 Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE*
174 [UNIT]}. If no unit is specified, it defaults to MiB. Return value
175 is always an int in MiB.
178 m = _PARSEUNIT_REGEX.match(str(input_string))
180 raise errors.UnitParseError("Invalid format")
182 value = float(m.groups()[0])
186 lcunit = unit.lower()
190 if lcunit in ("m", "mb", "mib"):
191 # Value already in MiB
194 elif lcunit in ("g", "gb", "gib"):
197 elif lcunit in ("t", "tb", "tib"):
201 raise errors.UnitParseError("Unknown unit: %s" % unit)
203 # Make sure we round up
204 if int(value) < value:
207 # Round up to the next multiple of 4
210 value += 4 - value % 4
215 def ShellQuote(value):
216 """Quotes shell argument according to POSIX.
219 @param value: the argument to be quoted
221 @return: the quoted value
224 if _SHELL_UNQUOTED_RE.match(value):
227 return "'%s'" % value.replace("'", "'\\''")
230 def ShellQuoteArgs(args):
231 """Quotes a list of shell arguments.
234 @param args: list of arguments to be quoted
236 @return: the quoted arguments concatenated with spaces
239 return " ".join([ShellQuote(i) for i in args])
243 """Helper class to write scripts with indentation.
248 def __init__(self, fh, indent=True):
249 """Initializes this class.
253 self._indent_enabled = indent
257 """Increase indentation level by 1.
263 """Decrease indentation level by 1.
266 assert self._indent > 0
269 def Write(self, txt, *args):
270 """Write line to output file.
273 assert self._indent >= 0
280 if line and self._indent_enabled:
281 # Indent only if there's something on the line
282 self._fh.write(self._indent * self.INDENT_STR)
289 def GenerateSecret(numbytes=20):
290 """Generates a random secret.
292 This will generate a pseudo-random secret returning an hex string
293 (so that it can be used where an ASCII string is needed).
295 @param numbytes: the number of bytes which will be represented by the returned
296 string (defaulting to 20, the length of a SHA1 hash)
298 @return: an hex representation of the pseudo-random sequence
301 return os.urandom(numbytes).encode("hex")
304 def _MakeMacAddrRegexp(octets):
305 """Builds a regular expression for verifying MAC addresses.
307 @type octets: integer
308 @param octets: How many octets to expect (1-6)
309 @return: Compiled regular expression
315 return re.compile("^%s$" % ":".join([_MAC_ADDR_OCTET_RE] * octets),
319 #: Regular expression for full MAC address
320 _MAC_CHECK_RE = _MakeMacAddrRegexp(6)
322 #: Regular expression for half a MAC address
323 _MAC_PREFIX_CHECK_RE = _MakeMacAddrRegexp(3)
326 def _MacAddressCheck(check_re, mac, msg):
327 """Checks a MAC address using a regular expression.
329 @param check_re: Compiled regular expression as returned by C{re.compile}
331 @param mac: MAC address to be validated
333 @param msg: Error message (%s will be replaced with MAC address)
336 if check_re.match(mac):
339 raise errors.OpPrereqError(msg % mac, errors.ECODE_INVAL)
342 def NormalizeAndValidateMac(mac):
343 """Normalizes and check if a MAC address is valid and contains six octets.
345 Checks whether the supplied MAC address is formally correct. Accepts
346 colon-separated format only. Normalize it to all lower case.
349 @param mac: MAC address to be validated
351 @return: Normalized and validated MAC address
352 @raise errors.OpPrereqError: If the MAC address isn't valid
355 return _MacAddressCheck(_MAC_CHECK_RE, mac, "Invalid MAC address '%s'")
358 def NormalizeAndValidateThreeOctetMacPrefix(mac):
359 """Normalizes a potential MAC address prefix (three octets).
361 Checks whether the supplied string is a valid MAC address prefix consisting
362 of three colon-separated octets. The result is normalized to all lower case.
365 @param mac: Prefix to be validated
367 @return: Normalized and validated prefix
368 @raise errors.OpPrereqError: If the MAC address prefix isn't valid
371 return _MacAddressCheck(_MAC_PREFIX_CHECK_RE, mac,
372 "Invalid MAC address prefix '%s'")
375 def SafeEncode(text):
376 """Return a 'safe' version of a source string.
378 This function mangles the input string and returns a version that
379 should be safe to display/encode as ASCII. To this end, we first
380 convert it to ASCII using the 'backslashreplace' encoding which
381 should get rid of any non-ASCII chars, and then we process it
382 through a loop copied from the string repr sources in the python; we
383 don't use string_escape anymore since that escape single quotes and
384 backslashes too, and that is too much; and that escaping is not
385 stable, i.e. string_escape(string_escape(x)) != string_escape(x).
387 @type text: str or unicode
388 @param text: input data
390 @return: a safe version of text
393 if isinstance(text, unicode):
394 # only if unicode; if str already, we handle it below
395 text = text.encode("ascii", "backslashreplace")
405 elif c < 32 or c >= 127: # non-printable
406 resu += "\\x%02x" % (c & 0xff)
412 def UnescapeAndSplit(text, sep=","):
413 """Split and unescape a string based on a given separator.
415 This function splits a string based on a separator where the
416 separator itself can be escape in order to be an element of the
417 elements. The escaping rules are (assuming coma being the
419 - a plain , separates the elements
420 - a sequence \\\\, (double backslash plus comma) is handled as a
421 backslash plus a separator comma
422 - a sequence \, (backslash plus comma) is handled as a
426 @param text: the string to split
428 @param text: the separator
430 @return: a list of strings
433 # we split the list by sep (with no escaping at this stage)
434 slist = text.split(sep)
435 # next, we revisit the elements and if any of them ended with an odd
436 # number of backslashes, then we join it with the next
440 if e1.endswith("\\"):
441 num_b = len(e1) - len(e1.rstrip("\\"))
442 if num_b % 2 == 1 and slist:
444 # Merge the two elements and push the result back to the source list for
445 # revisiting. If e2 ended with backslashes, further merging may need to
447 slist.insert(0, e1 + sep + e2)
449 # here the backslashes remain (all), and will be reduced in the next step
451 # finally, replace backslash-something with something
452 rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist]
456 def CommaJoin(names):
457 """Nicely join a set of identifiers.
459 @param names: set, list or tuple
460 @return: a string with the formatted results
463 return ", ".join([str(val) for val in names])
466 def FormatTime(val, usecs=None):
467 """Formats a time value.
469 @type val: float or None
470 @param val: Timestamp as returned by time.time() (seconds since Epoch,
471 1970-01-01 00:00:00 UTC)
472 @return: a string value or N/A if we don't have a valid timestamp
475 if val is None or not isinstance(val, (int, float)):
478 # these two codes works on Linux, but they are not guaranteed on all
480 result = time.strftime("%F %T", time.localtime(val))
482 if usecs is not None:
483 result += ".%06d" % usecs
488 def FormatSeconds(secs):
489 """Formats seconds for easier reading.
492 @param secs: Number of seconds
494 @return: Formatted seconds (e.g. "2d 9h 19m 49s")
499 secs = round(secs, 0)
502 # Negative values would be a bit tricky
503 for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]:
504 (complete, secs) = divmod(secs, one)
505 if complete or parts:
506 parts.append("%d%s" % (complete, unit))
508 parts.append("%ds" % secs)
510 return " ".join(parts)
514 """Splits data chunks into lines separated by newline.
516 Instances provide a file-like interface.
519 def __init__(self, line_fn, *args):
520 """Initializes this class.
522 @type line_fn: callable
523 @param line_fn: Function called for each line, first parameter is line
524 @param args: Extra arguments for L{line_fn}
527 assert callable(line_fn)
530 # Python 2.4 doesn't have functools.partial yet
532 lambda line: line_fn(line, *args) # pylint: disable=W0142
534 self._line_fn = line_fn
536 self._lines = collections.deque()
539 def write(self, data):
540 parts = (self._buffer + data).split("\n")
541 self._buffer = parts.pop()
542 self._lines.extend(parts)
546 self._line_fn(self._lines.popleft().rstrip("\r\n"))
551 self._line_fn(self._buffer)
554 def IsValidShellParam(word):
555 """Verifies is the given word is safe from the shell's p.o.v.
557 This means that we can pass this to a command via the shell and be
558 sure that it doesn't alter the command line and is passed as such to
561 Note that we are overly restrictive here, in order to be on the safe
565 @param word: the word to check
567 @return: True if the word is 'safe'
570 return bool(_SHELLPARAM_REGEX.match(word))
573 def BuildShellCmd(template, *args):
574 """Build a safe shell command line from the given arguments.
576 This function will check all arguments in the args list so that they
577 are valid shell parameters (i.e. they don't contain shell
578 metacharacters). If everything is ok, it will return the result of
582 @param template: the string holding the template for the
585 @return: the expanded command line
589 if not IsValidShellParam(word):
590 raise errors.ProgrammerError("Shell argument '%s' contains"
591 " invalid characters" % word)
592 return template % args
595 def FormatOrdinal(value):
596 """Formats a number as an ordinal in the English language.
598 E.g. the number 1 becomes "1st", 22 becomes "22nd".
607 if value > 10 and value < 20:
618 return "%s%s" % (value, suffix)
621 def Truncate(text, length):
622 """Truncate string and add ellipsis if needed.
626 @type length: integer
627 @param length: Desired length
629 @return: Truncated text
632 assert length > len(_ASCII_ELLIPSIS)
634 # Serialize if necessary
635 if not isinstance(text, basestring):
638 if len(text) <= length:
641 return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS
644 def FilterEmptyLinesAndComments(text):
645 """Filters empty lines and comments from a line-based string.
647 Whitespace is also removed from the beginning and end of all lines.
650 @param text: Input string
654 return [line for line in map(lambda s: s.strip(), text.splitlines())
655 # Ignore empty lines and comments
656 if line and not line.startswith("#")]
659 def FormatKeyValue(data):
660 """Formats a dictionary as "key=value" parameters.
662 The keys are sorted to have a stable order.
665 @rtype: list of string
668 return ["%s=%s" % (key, value) for (key, value) in sorted(data.items())]