Statistics
| Branch: | Tag: | Revision:

root / lib / utils / text.py @ c85b15c1

History | View | Annotate | Download (14.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Utility functions for manipulating or working with text.
22

23
"""
24

    
25

    
26
import re
27
import os
28
import time
29
import collections
30

    
31
from ganeti import errors
32

    
33

    
34
#: Unit checker regexp
35
_PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")
36

    
37
#: Characters which don't need to be quoted for shell commands
38
_SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$")
39

    
40
#: MAC checker regexp
41
_MAC_CHECK_RE = re.compile("^([0-9a-f]{2}:){5}[0-9a-f]{2}$", re.I)
42

    
43
#: Shell param checker regexp
44
_SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$")
45

    
46

    
47
def MatchNameComponent(key, name_list, case_sensitive=True):
48
  """Try to match a name against a list.
49

50
  This function will try to match a name like test1 against a list
51
  like C{['test1.example.com', 'test2.example.com', ...]}. Against
52
  this list, I{'test1'} as well as I{'test1.example'} will match, but
53
  not I{'test1.ex'}. A multiple match will be considered as no match
54
  at all (e.g. I{'test1'} against C{['test1.example.com',
55
  'test1.example.org']}), except when the key fully matches an entry
56
  (e.g. I{'test1'} against C{['test1', 'test1.example.com']}).
57

58
  @type key: str
59
  @param key: the name to be searched
60
  @type name_list: list
61
  @param name_list: the list of strings against which to search the key
62
  @type case_sensitive: boolean
63
  @param case_sensitive: whether to provide a case-sensitive match
64

65
  @rtype: None or str
66
  @return: None if there is no match I{or} if there are multiple matches,
67
      otherwise the element from the list which matches
68

69
  """
70
  if key in name_list:
71
    return key
72

    
73
  re_flags = 0
74
  if not case_sensitive:
75
    re_flags |= re.IGNORECASE
76
    key = key.upper()
77

    
78
  name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags)
79

    
80
  names_filtered = []
81
  string_matches = []
82
  for name in name_list:
83
    if name_re.match(name) is not None:
84
      names_filtered.append(name)
85
      if not case_sensitive and key == name.upper():
86
        string_matches.append(name)
87

    
88
  if len(string_matches) == 1:
89
    return string_matches[0]
90
  if len(names_filtered) == 1:
91
    return names_filtered[0]
92

    
93
  return None
94

    
95

    
96
def _DnsNameGlobHelper(match):
97
  """Helper function for L{DnsNameGlobPattern}.
98

99
  Returns regular expression pattern for parts of the pattern.
100

101
  """
102
  text = match.group(0)
103

    
104
  if text == "*":
105
    return "[^.]*"
106
  elif text == "?":
107
    return "[^.]"
108
  else:
109
    return re.escape(text)
110

    
111

    
112
def DnsNameGlobPattern(pattern):
113
  """Generates regular expression from DNS name globbing pattern.
114

115
  A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular
116
  expression. Escape sequences or ranges (e.g. [a-z]) are not supported.
117

118
  Matching always starts at the leftmost part. An asterisk (*) matches all
119
  characters except the dot (.) separating DNS name parts. A question mark (?)
120
  matches a single character except the dot (.).
121

122
  @type pattern: string
123
  @param pattern: DNS name globbing pattern
124
  @rtype: string
125
  @return: Regular expression
126

127
  """
128
  return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
129

    
130

    
131
def FormatUnit(value, units):
132
  """Formats an incoming number of MiB with the appropriate unit.
133

134
  @type value: int
135
  @param value: integer representing the value in MiB (1048576)
136
  @type units: char
137
  @param units: the type of formatting we should do:
138
      - 'h' for automatic scaling
139
      - 'm' for MiBs
140
      - 'g' for GiBs
141
      - 't' for TiBs
142
  @rtype: str
143
  @return: the formatted value (with suffix)
144

145
  """
146
  if units not in ("m", "g", "t", "h"):
147
    raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units))
148

    
149
  suffix = ""
150

    
151
  if units == "m" or (units == "h" and value < 1024):
152
    if units == "h":
153
      suffix = "M"
154
    return "%d%s" % (round(value, 0), suffix)
155

    
156
  elif units == "g" or (units == "h" and value < (1024 * 1024)):
157
    if units == "h":
158
      suffix = "G"
159
    return "%0.1f%s" % (round(float(value) / 1024, 1), suffix)
160

    
161
  else:
162
    if units == "h":
163
      suffix = "T"
164
    return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
165

    
166

    
167
def ParseUnit(input_string):
168
  """Tries to extract number and scale from the given string.
169

170
  Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE*
171
  [UNIT]}. If no unit is specified, it defaults to MiB. Return value
172
  is always an int in MiB.
173

174
  """
175
  m = _PARSEUNIT_REGEX.match(str(input_string))
176
  if not m:
177
    raise errors.UnitParseError("Invalid format")
178

    
179
  value = float(m.groups()[0])
180

    
181
  unit = m.groups()[1]
182
  if unit:
183
    lcunit = unit.lower()
184
  else:
185
    lcunit = "m"
186

    
187
  if lcunit in ("m", "mb", "mib"):
188
    # Value already in MiB
189
    pass
190

    
191
  elif lcunit in ("g", "gb", "gib"):
192
    value *= 1024
193

    
194
  elif lcunit in ("t", "tb", "tib"):
195
    value *= 1024 * 1024
196

    
197
  else:
198
    raise errors.UnitParseError("Unknown unit: %s" % unit)
199

    
200
  # Make sure we round up
201
  if int(value) < value:
202
    value += 1
203

    
204
  # Round up to the next multiple of 4
205
  value = int(value)
206
  if value % 4:
207
    value += 4 - value % 4
208

    
209
  return value
210

    
211

    
212
def ShellQuote(value):
213
  """Quotes shell argument according to POSIX.
214

215
  @type value: str
216
  @param value: the argument to be quoted
217
  @rtype: str
218
  @return: the quoted value
219

220
  """
221
  if _SHELL_UNQUOTED_RE.match(value):
222
    return value
223
  else:
224
    return "'%s'" % value.replace("'", "'\\''")
225

    
226

    
227
def ShellQuoteArgs(args):
228
  """Quotes a list of shell arguments.
229

230
  @type args: list
231
  @param args: list of arguments to be quoted
232
  @rtype: str
233
  @return: the quoted arguments concatenated with spaces
234

235
  """
236
  return " ".join([ShellQuote(i) for i in args])
237

    
238

    
239
class ShellWriter:
240
  """Helper class to write scripts with indentation.
241

242
  """
243
  INDENT_STR = "  "
244

    
245
  def __init__(self, fh):
246
    """Initializes this class.
247

248
    """
249
    self._fh = fh
250
    self._indent = 0
251

    
252
  def IncIndent(self):
253
    """Increase indentation level by 1.
254

255
    """
256
    self._indent += 1
257

    
258
  def DecIndent(self):
259
    """Decrease indentation level by 1.
260

261
    """
262
    assert self._indent > 0
263
    self._indent -= 1
264

    
265
  def Write(self, txt, *args):
266
    """Write line to output file.
267

268
    """
269
    assert self._indent >= 0
270

    
271
    self._fh.write(self._indent * self.INDENT_STR)
272

    
273
    if args:
274
      self._fh.write(txt % args)
275
    else:
276
      self._fh.write(txt)
277

    
278
    self._fh.write("\n")
279

    
280

    
281
def GenerateSecret(numbytes=20):
282
  """Generates a random secret.
283

284
  This will generate a pseudo-random secret returning an hex string
285
  (so that it can be used where an ASCII string is needed).
286

287
  @param numbytes: the number of bytes which will be represented by the returned
288
      string (defaulting to 20, the length of a SHA1 hash)
289
  @rtype: str
290
  @return: an hex representation of the pseudo-random sequence
291

292
  """
293
  return os.urandom(numbytes).encode("hex")
294

    
295

    
296
def NormalizeAndValidateMac(mac):
297
  """Normalizes and check if a MAC address is valid.
298

299
  Checks whether the supplied MAC address is formally correct, only
300
  accepts colon separated format. Normalize it to all lower.
301

302
  @type mac: str
303
  @param mac: the MAC to be validated
304
  @rtype: str
305
  @return: returns the normalized and validated MAC.
306

307
  @raise errors.OpPrereqError: If the MAC isn't valid
308

309
  """
310
  if not _MAC_CHECK_RE.match(mac):
311
    raise errors.OpPrereqError("Invalid MAC address '%s'" % mac,
312
                               errors.ECODE_INVAL)
313

    
314
  return mac.lower()
315

    
316

    
317
def SafeEncode(text):
318
  """Return a 'safe' version of a source string.
319

320
  This function mangles the input string and returns a version that
321
  should be safe to display/encode as ASCII. To this end, we first
322
  convert it to ASCII using the 'backslashreplace' encoding which
323
  should get rid of any non-ASCII chars, and then we process it
324
  through a loop copied from the string repr sources in the python; we
325
  don't use string_escape anymore since that escape single quotes and
326
  backslashes too, and that is too much; and that escaping is not
327
  stable, i.e. string_escape(string_escape(x)) != string_escape(x).
328

329
  @type text: str or unicode
330
  @param text: input data
331
  @rtype: str
332
  @return: a safe version of text
333

334
  """
335
  if isinstance(text, unicode):
336
    # only if unicode; if str already, we handle it below
337
    text = text.encode("ascii", "backslashreplace")
338
  resu = ""
339
  for char in text:
340
    c = ord(char)
341
    if char == "\t":
342
      resu += r"\t"
343
    elif char == "\n":
344
      resu += r"\n"
345
    elif char == "\r":
346
      resu += r'\'r'
347
    elif c < 32 or c >= 127: # non-printable
348
      resu += "\\x%02x" % (c & 0xff)
349
    else:
350
      resu += char
351
  return resu
352

    
353

    
354
def UnescapeAndSplit(text, sep=","):
355
  """Split and unescape a string based on a given separator.
356

357
  This function splits a string based on a separator where the
358
  separator itself can be escape in order to be an element of the
359
  elements. The escaping rules are (assuming coma being the
360
  separator):
361
    - a plain , separates the elements
362
    - a sequence \\\\, (double backslash plus comma) is handled as a
363
      backslash plus a separator comma
364
    - a sequence \, (backslash plus comma) is handled as a
365
      non-separator comma
366

367
  @type text: string
368
  @param text: the string to split
369
  @type sep: string
370
  @param text: the separator
371
  @rtype: string
372
  @return: a list of strings
373

374
  """
375
  # we split the list by sep (with no escaping at this stage)
376
  slist = text.split(sep)
377
  # next, we revisit the elements and if any of them ended with an odd
378
  # number of backslashes, then we join it with the next
379
  rlist = []
380
  while slist:
381
    e1 = slist.pop(0)
382
    if e1.endswith("\\"):
383
      num_b = len(e1) - len(e1.rstrip("\\"))
384
      if num_b % 2 == 1 and slist:
385
        e2 = slist.pop(0)
386
        # Merge the two elements and push the result back to the source list for
387
        # revisiting. If e2 ended with backslashes, further merging may need to
388
        # be done.
389
        slist.insert(0, e1 + sep + e2)
390
        continue
391
    # here the backslashes remain (all), and will be reduced in the next step
392
    rlist.append(e1)
393
  # finally, replace backslash-something with something
394
  rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist]
395
  return rlist
396

    
397

    
398
def CommaJoin(names):
399
  """Nicely join a set of identifiers.
400

401
  @param names: set, list or tuple
402
  @return: a string with the formatted results
403

404
  """
405
  return ", ".join([str(val) for val in names])
406

    
407

    
408
def FormatTime(val):
409
  """Formats a time value.
410

411
  @type val: float or None
412
  @param val: Timestamp as returned by time.time() (seconds since Epoch,
413
    1970-01-01 00:00:00 UTC)
414
  @return: a string value or N/A if we don't have a valid timestamp
415

416
  """
417
  if val is None or not isinstance(val, (int, float)):
418
    return "N/A"
419
  # these two codes works on Linux, but they are not guaranteed on all
420
  # platforms
421
  return time.strftime("%F %T", time.localtime(val))
422

    
423

    
424
def FormatSeconds(secs):
425
  """Formats seconds for easier reading.
426

427
  @type secs: number
428
  @param secs: Number of seconds
429
  @rtype: string
430
  @return: Formatted seconds (e.g. "2d 9h 19m 49s")
431

432
  """
433
  parts = []
434

    
435
  secs = round(secs, 0)
436

    
437
  if secs > 0:
438
    # Negative values would be a bit tricky
439
    for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]:
440
      (complete, secs) = divmod(secs, one)
441
      if complete or parts:
442
        parts.append("%d%s" % (complete, unit))
443

    
444
  parts.append("%ds" % secs)
445

    
446
  return " ".join(parts)
447

    
448

    
449
class LineSplitter:
450
  """Splits data chunks into lines separated by newline.
451

452
  Instances provide a file-like interface.
453

454
  """
455
  def __init__(self, line_fn, *args):
456
    """Initializes this class.
457

458
    @type line_fn: callable
459
    @param line_fn: Function called for each line, first parameter is line
460
    @param args: Extra arguments for L{line_fn}
461

462
    """
463
    assert callable(line_fn)
464

    
465
    if args:
466
      # Python 2.4 doesn't have functools.partial yet
467
      self._line_fn = \
468
        lambda line: line_fn(line, *args) # pylint: disable=W0142
469
    else:
470
      self._line_fn = line_fn
471

    
472
    self._lines = collections.deque()
473
    self._buffer = ""
474

    
475
  def write(self, data):
476
    parts = (self._buffer + data).split("\n")
477
    self._buffer = parts.pop()
478
    self._lines.extend(parts)
479

    
480
  def flush(self):
481
    while self._lines:
482
      self._line_fn(self._lines.popleft().rstrip("\r\n"))
483

    
484
  def close(self):
485
    self.flush()
486
    if self._buffer:
487
      self._line_fn(self._buffer)
488

    
489

    
490
def IsValidShellParam(word):
491
  """Verifies is the given word is safe from the shell's p.o.v.
492

493
  This means that we can pass this to a command via the shell and be
494
  sure that it doesn't alter the command line and is passed as such to
495
  the actual command.
496

497
  Note that we are overly restrictive here, in order to be on the safe
498
  side.
499

500
  @type word: str
501
  @param word: the word to check
502
  @rtype: boolean
503
  @return: True if the word is 'safe'
504

505
  """
506
  return bool(_SHELLPARAM_REGEX.match(word))
507

    
508

    
509
def BuildShellCmd(template, *args):
510
  """Build a safe shell command line from the given arguments.
511

512
  This function will check all arguments in the args list so that they
513
  are valid shell parameters (i.e. they don't contain shell
514
  metacharacters). If everything is ok, it will return the result of
515
  template % args.
516

517
  @type template: str
518
  @param template: the string holding the template for the
519
      string formatting
520
  @rtype: str
521
  @return: the expanded command line
522

523
  """
524
  for word in args:
525
    if not IsValidShellParam(word):
526
      raise errors.ProgrammerError("Shell argument '%s' contains"
527
                                   " invalid characters" % word)
528
  return template % args
529

    
530

    
531
def FormatOrdinal(value):
532
  """Formats a number as an ordinal in the English language.
533

534
  E.g. the number 1 becomes "1st", 22 becomes "22nd".
535

536
  @type value: integer
537
  @param value: Number
538
  @rtype: string
539

540
  """
541
  tens = value % 10
542

    
543
  if value > 10 and value < 20:
544
    suffix = "th"
545
  elif tens == 1:
546
    suffix = "st"
547
  elif tens == 2:
548
    suffix = "nd"
549
  elif tens == 3:
550
    suffix = "rd"
551
  else:
552
    suffix = "th"
553

    
554
  return "%s%s" % (value, suffix)