4 # Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Utility functions for manipulating or working with text.
31 from ganeti import errors
34 #: Unit checker regexp
35 _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")
37 #: Characters which don't need to be quoted for shell commands
38 _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$")
41 _MAC_CHECK_RE = re.compile("^([0-9a-f]{2}:){5}[0-9a-f]{2}$", re.I)
43 #: Shell param checker regexp
44 _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$")
47 def MatchNameComponent(key, name_list, case_sensitive=True):
48 """Try to match a name against a list.
50 This function will try to match a name like test1 against a list
51 like C{['test1.example.com', 'test2.example.com', ...]}. Against
52 this list, I{'test1'} as well as I{'test1.example'} will match, but
53 not I{'test1.ex'}. A multiple match will be considered as no match
54 at all (e.g. I{'test1'} against C{['test1.example.com',
55 'test1.example.org']}), except when the key fully matches an entry
56 (e.g. I{'test1'} against C{['test1', 'test1.example.com']}).
59 @param key: the name to be searched
61 @param name_list: the list of strings against which to search the key
62 @type case_sensitive: boolean
63 @param case_sensitive: whether to provide a case-sensitive match
66 @return: None if there is no match I{or} if there are multiple matches,
67 otherwise the element from the list which matches
74 if not case_sensitive:
75 re_flags |= re.IGNORECASE
78 name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags)
82 for name in name_list:
83 if name_re.match(name) is not None:
84 names_filtered.append(name)
85 if not case_sensitive and key == name.upper():
86 string_matches.append(name)
88 if len(string_matches) == 1:
89 return string_matches[0]
90 if len(names_filtered) == 1:
91 return names_filtered[0]
96 def _DnsNameGlobHelper(match):
97 """Helper function for L{DnsNameGlobPattern}.
99 Returns regular expression pattern for parts of the pattern.
102 text = match.group(0)
109 return re.escape(text)
112 def DnsNameGlobPattern(pattern):
113 """Generates regular expression from DNS name globbing pattern.
115 A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular
116 expression. Escape sequences or ranges (e.g. [a-z]) are not supported.
118 Matching always starts at the leftmost part. An asterisk (*) matches all
119 characters except the dot (.) separating DNS name parts. A question mark (?)
120 matches a single character except the dot (.).
122 @type pattern: string
123 @param pattern: DNS name globbing pattern
125 @return: Regular expression
128 return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
131 def FormatUnit(value, units):
132 """Formats an incoming number of MiB with the appropriate unit.
135 @param value: integer representing the value in MiB (1048576)
137 @param units: the type of formatting we should do:
138 - 'h' for automatic scaling
143 @return: the formatted value (with suffix)
146 if units not in ("m", "g", "t", "h"):
147 raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units))
151 if units == "m" or (units == "h" and value < 1024):
154 return "%d%s" % (round(value, 0), suffix)
156 elif units == "g" or (units == "h" and value < (1024 * 1024)):
159 return "%0.1f%s" % (round(float(value) / 1024, 1), suffix)
164 return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
167 def ParseUnit(input_string):
168 """Tries to extract number and scale from the given string.
170 Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE*
171 [UNIT]}. If no unit is specified, it defaults to MiB. Return value
172 is always an int in MiB.
175 m = _PARSEUNIT_REGEX.match(str(input_string))
177 raise errors.UnitParseError("Invalid format")
179 value = float(m.groups()[0])
183 lcunit = unit.lower()
187 if lcunit in ("m", "mb", "mib"):
188 # Value already in MiB
191 elif lcunit in ("g", "gb", "gib"):
194 elif lcunit in ("t", "tb", "tib"):
198 raise errors.UnitParseError("Unknown unit: %s" % unit)
200 # Make sure we round up
201 if int(value) < value:
204 # Round up to the next multiple of 4
207 value += 4 - value % 4
212 def ShellQuote(value):
213 """Quotes shell argument according to POSIX.
216 @param value: the argument to be quoted
218 @return: the quoted value
221 if _SHELL_UNQUOTED_RE.match(value):
224 return "'%s'" % value.replace("'", "'\\''")
227 def ShellQuoteArgs(args):
228 """Quotes a list of shell arguments.
231 @param args: list of arguments to be quoted
233 @return: the quoted arguments concatenated with spaces
236 return " ".join([ShellQuote(i) for i in args])
240 """Helper class to write scripts with indentation.
245 def __init__(self, fh):
246 """Initializes this class.
253 """Increase indentation level by 1.
259 """Decrease indentation level by 1.
262 assert self._indent > 0
265 def Write(self, txt, *args):
266 """Write line to output file.
269 assert self._indent >= 0
271 self._fh.write(self._indent * self.INDENT_STR)
274 self._fh.write(txt % args)
281 def GenerateSecret(numbytes=20):
282 """Generates a random secret.
284 This will generate a pseudo-random secret returning an hex string
285 (so that it can be used where an ASCII string is needed).
287 @param numbytes: the number of bytes which will be represented by the returned
288 string (defaulting to 20, the length of a SHA1 hash)
290 @return: an hex representation of the pseudo-random sequence
293 return os.urandom(numbytes).encode("hex")
296 def NormalizeAndValidateMac(mac):
297 """Normalizes and check if a MAC address is valid.
299 Checks whether the supplied MAC address is formally correct, only
300 accepts colon separated format. Normalize it to all lower.
303 @param mac: the MAC to be validated
305 @return: returns the normalized and validated MAC.
307 @raise errors.OpPrereqError: If the MAC isn't valid
310 if not _MAC_CHECK_RE.match(mac):
311 raise errors.OpPrereqError("Invalid MAC address '%s'" % mac,
317 def SafeEncode(text):
318 """Return a 'safe' version of a source string.
320 This function mangles the input string and returns a version that
321 should be safe to display/encode as ASCII. To this end, we first
322 convert it to ASCII using the 'backslashreplace' encoding which
323 should get rid of any non-ASCII chars, and then we process it
324 through a loop copied from the string repr sources in the python; we
325 don't use string_escape anymore since that escape single quotes and
326 backslashes too, and that is too much; and that escaping is not
327 stable, i.e. string_escape(string_escape(x)) != string_escape(x).
329 @type text: str or unicode
330 @param text: input data
332 @return: a safe version of text
335 if isinstance(text, unicode):
336 # only if unicode; if str already, we handle it below
337 text = text.encode("ascii", "backslashreplace")
347 elif c < 32 or c >= 127: # non-printable
348 resu += "\\x%02x" % (c & 0xff)
354 def UnescapeAndSplit(text, sep=","):
355 """Split and unescape a string based on a given separator.
357 This function splits a string based on a separator where the
358 separator itself can be escape in order to be an element of the
359 elements. The escaping rules are (assuming coma being the
361 - a plain , separates the elements
362 - a sequence \\\\, (double backslash plus comma) is handled as a
363 backslash plus a separator comma
364 - a sequence \, (backslash plus comma) is handled as a
368 @param text: the string to split
370 @param text: the separator
372 @return: a list of strings
375 # we split the list by sep (with no escaping at this stage)
376 slist = text.split(sep)
377 # next, we revisit the elements and if any of them ended with an odd
378 # number of backslashes, then we join it with the next
382 if e1.endswith("\\"):
383 num_b = len(e1) - len(e1.rstrip("\\"))
384 if num_b % 2 == 1 and slist:
386 # Merge the two elements and push the result back to the source list for
387 # revisiting. If e2 ended with backslashes, further merging may need to
389 slist.insert(0, e1 + sep + e2)
391 # here the backslashes remain (all), and will be reduced in the next step
393 # finally, replace backslash-something with something
394 rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist]
398 def CommaJoin(names):
399 """Nicely join a set of identifiers.
401 @param names: set, list or tuple
402 @return: a string with the formatted results
405 return ", ".join([str(val) for val in names])
409 """Formats a time value.
411 @type val: float or None
412 @param val: Timestamp as returned by time.time() (seconds since Epoch,
413 1970-01-01 00:00:00 UTC)
414 @return: a string value or N/A if we don't have a valid timestamp
417 if val is None or not isinstance(val, (int, float)):
419 # these two codes works on Linux, but they are not guaranteed on all
421 return time.strftime("%F %T", time.localtime(val))
424 def FormatSeconds(secs):
425 """Formats seconds for easier reading.
428 @param secs: Number of seconds
430 @return: Formatted seconds (e.g. "2d 9h 19m 49s")
435 secs = round(secs, 0)
438 # Negative values would be a bit tricky
439 for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]:
440 (complete, secs) = divmod(secs, one)
441 if complete or parts:
442 parts.append("%d%s" % (complete, unit))
444 parts.append("%ds" % secs)
446 return " ".join(parts)
450 """Splits data chunks into lines separated by newline.
452 Instances provide a file-like interface.
455 def __init__(self, line_fn, *args):
456 """Initializes this class.
458 @type line_fn: callable
459 @param line_fn: Function called for each line, first parameter is line
460 @param args: Extra arguments for L{line_fn}
463 assert callable(line_fn)
466 # Python 2.4 doesn't have functools.partial yet
468 lambda line: line_fn(line, *args) # pylint: disable=W0142
470 self._line_fn = line_fn
472 self._lines = collections.deque()
475 def write(self, data):
476 parts = (self._buffer + data).split("\n")
477 self._buffer = parts.pop()
478 self._lines.extend(parts)
482 self._line_fn(self._lines.popleft().rstrip("\r\n"))
487 self._line_fn(self._buffer)
490 def IsValidShellParam(word):
491 """Verifies is the given word is safe from the shell's p.o.v.
493 This means that we can pass this to a command via the shell and be
494 sure that it doesn't alter the command line and is passed as such to
497 Note that we are overly restrictive here, in order to be on the safe
501 @param word: the word to check
503 @return: True if the word is 'safe'
506 return bool(_SHELLPARAM_REGEX.match(word))
509 def BuildShellCmd(template, *args):
510 """Build a safe shell command line from the given arguments.
512 This function will check all arguments in the args list so that they
513 are valid shell parameters (i.e. they don't contain shell
514 metacharacters). If everything is ok, it will return the result of
518 @param template: the string holding the template for the
521 @return: the expanded command line
525 if not IsValidShellParam(word):
526 raise errors.ProgrammerError("Shell argument '%s' contains"
527 " invalid characters" % word)
528 return template % args
531 def FormatOrdinal(value):
532 """Formats a number as an ordinal in the English language.
534 E.g. the number 1 becomes "1st", 22 becomes "22nd".
543 if value > 10 and value < 20:
554 return "%s%s" % (value, suffix)