Statistics
| Branch: | Tag: | Revision:

root / lib / utils / text.py @ ea0f78c8

History | View | Annotate | Download (14.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Utility functions for manipulating or working with text.
22

23
"""
24

    
25

    
26
import re
27
import os
28
import time
29
import collections
30

    
31
from ganeti import errors
32

    
33

    
34
#: Unit checker regexp
35
_PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")
36

    
37
#: Characters which don't need to be quoted for shell commands
38
_SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$")
39

    
40
#: MAC checker regexp
41
_MAC_CHECK_RE = re.compile("^([0-9a-f]{2}:){5}[0-9a-f]{2}$", re.I)
42

    
43
#: Shell param checker regexp
44
_SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$")
45

    
46
#: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026)
47
_ASCII_ELLIPSIS = "..."
48

    
49

    
50
def MatchNameComponent(key, name_list, case_sensitive=True):
51
  """Try to match a name against a list.
52

53
  This function will try to match a name like test1 against a list
54
  like C{['test1.example.com', 'test2.example.com', ...]}. Against
55
  this list, I{'test1'} as well as I{'test1.example'} will match, but
56
  not I{'test1.ex'}. A multiple match will be considered as no match
57
  at all (e.g. I{'test1'} against C{['test1.example.com',
58
  'test1.example.org']}), except when the key fully matches an entry
59
  (e.g. I{'test1'} against C{['test1', 'test1.example.com']}).
60

61
  @type key: str
62
  @param key: the name to be searched
63
  @type name_list: list
64
  @param name_list: the list of strings against which to search the key
65
  @type case_sensitive: boolean
66
  @param case_sensitive: whether to provide a case-sensitive match
67

68
  @rtype: None or str
69
  @return: None if there is no match I{or} if there are multiple matches,
70
      otherwise the element from the list which matches
71

72
  """
73
  if key in name_list:
74
    return key
75

    
76
  re_flags = 0
77
  if not case_sensitive:
78
    re_flags |= re.IGNORECASE
79
    key = key.upper()
80

    
81
  name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags)
82

    
83
  names_filtered = []
84
  string_matches = []
85
  for name in name_list:
86
    if name_re.match(name) is not None:
87
      names_filtered.append(name)
88
      if not case_sensitive and key == name.upper():
89
        string_matches.append(name)
90

    
91
  if len(string_matches) == 1:
92
    return string_matches[0]
93
  if len(names_filtered) == 1:
94
    return names_filtered[0]
95

    
96
  return None
97

    
98

    
99
def _DnsNameGlobHelper(match):
100
  """Helper function for L{DnsNameGlobPattern}.
101

102
  Returns regular expression pattern for parts of the pattern.
103

104
  """
105
  text = match.group(0)
106

    
107
  if text == "*":
108
    return "[^.]*"
109
  elif text == "?":
110
    return "[^.]"
111
  else:
112
    return re.escape(text)
113

    
114

    
115
def DnsNameGlobPattern(pattern):
116
  """Generates regular expression from DNS name globbing pattern.
117

118
  A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular
119
  expression. Escape sequences or ranges (e.g. [a-z]) are not supported.
120

121
  Matching always starts at the leftmost part. An asterisk (*) matches all
122
  characters except the dot (.) separating DNS name parts. A question mark (?)
123
  matches a single character except the dot (.).
124

125
  @type pattern: string
126
  @param pattern: DNS name globbing pattern
127
  @rtype: string
128
  @return: Regular expression
129

130
  """
131
  return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
132

    
133

    
134
def FormatUnit(value, units):
135
  """Formats an incoming number of MiB with the appropriate unit.
136

137
  @type value: int
138
  @param value: integer representing the value in MiB (1048576)
139
  @type units: char
140
  @param units: the type of formatting we should do:
141
      - 'h' for automatic scaling
142
      - 'm' for MiBs
143
      - 'g' for GiBs
144
      - 't' for TiBs
145
  @rtype: str
146
  @return: the formatted value (with suffix)
147

148
  """
149
  if units not in ("m", "g", "t", "h"):
150
    raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units))
151

    
152
  suffix = ""
153

    
154
  if units == "m" or (units == "h" and value < 1024):
155
    if units == "h":
156
      suffix = "M"
157
    return "%d%s" % (round(value, 0), suffix)
158

    
159
  elif units == "g" or (units == "h" and value < (1024 * 1024)):
160
    if units == "h":
161
      suffix = "G"
162
    return "%0.1f%s" % (round(float(value) / 1024, 1), suffix)
163

    
164
  else:
165
    if units == "h":
166
      suffix = "T"
167
    return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
168

    
169

    
170
def ParseUnit(input_string):
171
  """Tries to extract number and scale from the given string.
172

173
  Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE*
174
  [UNIT]}. If no unit is specified, it defaults to MiB. Return value
175
  is always an int in MiB.
176

177
  """
178
  m = _PARSEUNIT_REGEX.match(str(input_string))
179
  if not m:
180
    raise errors.UnitParseError("Invalid format")
181

    
182
  value = float(m.groups()[0])
183

    
184
  unit = m.groups()[1]
185
  if unit:
186
    lcunit = unit.lower()
187
  else:
188
    lcunit = "m"
189

    
190
  if lcunit in ("m", "mb", "mib"):
191
    # Value already in MiB
192
    pass
193

    
194
  elif lcunit in ("g", "gb", "gib"):
195
    value *= 1024
196

    
197
  elif lcunit in ("t", "tb", "tib"):
198
    value *= 1024 * 1024
199

    
200
  else:
201
    raise errors.UnitParseError("Unknown unit: %s" % unit)
202

    
203
  # Make sure we round up
204
  if int(value) < value:
205
    value += 1
206

    
207
  # Round up to the next multiple of 4
208
  value = int(value)
209
  if value % 4:
210
    value += 4 - value % 4
211

    
212
  return value
213

    
214

    
215
def ShellQuote(value):
216
  """Quotes shell argument according to POSIX.
217

218
  @type value: str
219
  @param value: the argument to be quoted
220
  @rtype: str
221
  @return: the quoted value
222

223
  """
224
  if _SHELL_UNQUOTED_RE.match(value):
225
    return value
226
  else:
227
    return "'%s'" % value.replace("'", "'\\''")
228

    
229

    
230
def ShellQuoteArgs(args):
231
  """Quotes a list of shell arguments.
232

233
  @type args: list
234
  @param args: list of arguments to be quoted
235
  @rtype: str
236
  @return: the quoted arguments concatenated with spaces
237

238
  """
239
  return " ".join([ShellQuote(i) for i in args])
240

    
241

    
242
class ShellWriter:
243
  """Helper class to write scripts with indentation.
244

245
  """
246
  INDENT_STR = "  "
247

    
248
  def __init__(self, fh):
249
    """Initializes this class.
250

251
    """
252
    self._fh = fh
253
    self._indent = 0
254

    
255
  def IncIndent(self):
256
    """Increase indentation level by 1.
257

258
    """
259
    self._indent += 1
260

    
261
  def DecIndent(self):
262
    """Decrease indentation level by 1.
263

264
    """
265
    assert self._indent > 0
266
    self._indent -= 1
267

    
268
  def Write(self, txt, *args):
269
    """Write line to output file.
270

271
    """
272
    assert self._indent >= 0
273

    
274
    if args:
275
      line = txt % args
276
    else:
277
      line = txt
278

    
279
    if line:
280
      # Indent only if there's something on the line
281
      self._fh.write(self._indent * self.INDENT_STR)
282

    
283
    self._fh.write(line)
284

    
285
    self._fh.write("\n")
286

    
287

    
288
def GenerateSecret(numbytes=20):
289
  """Generates a random secret.
290

291
  This will generate a pseudo-random secret returning an hex string
292
  (so that it can be used where an ASCII string is needed).
293

294
  @param numbytes: the number of bytes which will be represented by the returned
295
      string (defaulting to 20, the length of a SHA1 hash)
296
  @rtype: str
297
  @return: an hex representation of the pseudo-random sequence
298

299
  """
300
  return os.urandom(numbytes).encode("hex")
301

    
302

    
303
def NormalizeAndValidateMac(mac):
304
  """Normalizes and check if a MAC address is valid.
305

306
  Checks whether the supplied MAC address is formally correct, only
307
  accepts colon separated format. Normalize it to all lower.
308

309
  @type mac: str
310
  @param mac: the MAC to be validated
311
  @rtype: str
312
  @return: returns the normalized and validated MAC.
313

314
  @raise errors.OpPrereqError: If the MAC isn't valid
315

316
  """
317
  if not _MAC_CHECK_RE.match(mac):
318
    raise errors.OpPrereqError("Invalid MAC address '%s'" % mac,
319
                               errors.ECODE_INVAL)
320

    
321
  return mac.lower()
322

    
323

    
324
def SafeEncode(text):
325
  """Return a 'safe' version of a source string.
326

327
  This function mangles the input string and returns a version that
328
  should be safe to display/encode as ASCII. To this end, we first
329
  convert it to ASCII using the 'backslashreplace' encoding which
330
  should get rid of any non-ASCII chars, and then we process it
331
  through a loop copied from the string repr sources in the python; we
332
  don't use string_escape anymore since that escape single quotes and
333
  backslashes too, and that is too much; and that escaping is not
334
  stable, i.e. string_escape(string_escape(x)) != string_escape(x).
335

336
  @type text: str or unicode
337
  @param text: input data
338
  @rtype: str
339
  @return: a safe version of text
340

341
  """
342
  if isinstance(text, unicode):
343
    # only if unicode; if str already, we handle it below
344
    text = text.encode("ascii", "backslashreplace")
345
  resu = ""
346
  for char in text:
347
    c = ord(char)
348
    if char == "\t":
349
      resu += r"\t"
350
    elif char == "\n":
351
      resu += r"\n"
352
    elif char == "\r":
353
      resu += r'\'r'
354
    elif c < 32 or c >= 127: # non-printable
355
      resu += "\\x%02x" % (c & 0xff)
356
    else:
357
      resu += char
358
  return resu
359

    
360

    
361
def UnescapeAndSplit(text, sep=","):
362
  """Split and unescape a string based on a given separator.
363

364
  This function splits a string based on a separator where the
365
  separator itself can be escape in order to be an element of the
366
  elements. The escaping rules are (assuming coma being the
367
  separator):
368
    - a plain , separates the elements
369
    - a sequence \\\\, (double backslash plus comma) is handled as a
370
      backslash plus a separator comma
371
    - a sequence \, (backslash plus comma) is handled as a
372
      non-separator comma
373

374
  @type text: string
375
  @param text: the string to split
376
  @type sep: string
377
  @param text: the separator
378
  @rtype: string
379
  @return: a list of strings
380

381
  """
382
  # we split the list by sep (with no escaping at this stage)
383
  slist = text.split(sep)
384
  # next, we revisit the elements and if any of them ended with an odd
385
  # number of backslashes, then we join it with the next
386
  rlist = []
387
  while slist:
388
    e1 = slist.pop(0)
389
    if e1.endswith("\\"):
390
      num_b = len(e1) - len(e1.rstrip("\\"))
391
      if num_b % 2 == 1 and slist:
392
        e2 = slist.pop(0)
393
        # Merge the two elements and push the result back to the source list for
394
        # revisiting. If e2 ended with backslashes, further merging may need to
395
        # be done.
396
        slist.insert(0, e1 + sep + e2)
397
        continue
398
    # here the backslashes remain (all), and will be reduced in the next step
399
    rlist.append(e1)
400
  # finally, replace backslash-something with something
401
  rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist]
402
  return rlist
403

    
404

    
405
def CommaJoin(names):
406
  """Nicely join a set of identifiers.
407

408
  @param names: set, list or tuple
409
  @return: a string with the formatted results
410

411
  """
412
  return ", ".join([str(val) for val in names])
413

    
414

    
415
def FormatTime(val, usecs=None):
416
  """Formats a time value.
417

418
  @type val: float or None
419
  @param val: Timestamp as returned by time.time() (seconds since Epoch,
420
    1970-01-01 00:00:00 UTC)
421
  @return: a string value or N/A if we don't have a valid timestamp
422

423
  """
424
  if val is None or not isinstance(val, (int, float)):
425
    return "N/A"
426

    
427
  # these two codes works on Linux, but they are not guaranteed on all
428
  # platforms
429
  result = time.strftime("%F %T", time.localtime(val))
430

    
431
  if usecs is not None:
432
    result += ".%06d" % usecs
433

    
434
  return result
435

    
436

    
437
def FormatSeconds(secs):
438
  """Formats seconds for easier reading.
439

440
  @type secs: number
441
  @param secs: Number of seconds
442
  @rtype: string
443
  @return: Formatted seconds (e.g. "2d 9h 19m 49s")
444

445
  """
446
  parts = []
447

    
448
  secs = round(secs, 0)
449

    
450
  if secs > 0:
451
    # Negative values would be a bit tricky
452
    for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]:
453
      (complete, secs) = divmod(secs, one)
454
      if complete or parts:
455
        parts.append("%d%s" % (complete, unit))
456

    
457
  parts.append("%ds" % secs)
458

    
459
  return " ".join(parts)
460

    
461

    
462
class LineSplitter:
463
  """Splits data chunks into lines separated by newline.
464

465
  Instances provide a file-like interface.
466

467
  """
468
  def __init__(self, line_fn, *args):
469
    """Initializes this class.
470

471
    @type line_fn: callable
472
    @param line_fn: Function called for each line, first parameter is line
473
    @param args: Extra arguments for L{line_fn}
474

475
    """
476
    assert callable(line_fn)
477

    
478
    if args:
479
      # Python 2.4 doesn't have functools.partial yet
480
      self._line_fn = \
481
        lambda line: line_fn(line, *args) # pylint: disable=W0142
482
    else:
483
      self._line_fn = line_fn
484

    
485
    self._lines = collections.deque()
486
    self._buffer = ""
487

    
488
  def write(self, data):
489
    parts = (self._buffer + data).split("\n")
490
    self._buffer = parts.pop()
491
    self._lines.extend(parts)
492

    
493
  def flush(self):
494
    while self._lines:
495
      self._line_fn(self._lines.popleft().rstrip("\r\n"))
496

    
497
  def close(self):
498
    self.flush()
499
    if self._buffer:
500
      self._line_fn(self._buffer)
501

    
502

    
503
def IsValidShellParam(word):
504
  """Verifies is the given word is safe from the shell's p.o.v.
505

506
  This means that we can pass this to a command via the shell and be
507
  sure that it doesn't alter the command line and is passed as such to
508
  the actual command.
509

510
  Note that we are overly restrictive here, in order to be on the safe
511
  side.
512

513
  @type word: str
514
  @param word: the word to check
515
  @rtype: boolean
516
  @return: True if the word is 'safe'
517

518
  """
519
  return bool(_SHELLPARAM_REGEX.match(word))
520

    
521

    
522
def BuildShellCmd(template, *args):
523
  """Build a safe shell command line from the given arguments.
524

525
  This function will check all arguments in the args list so that they
526
  are valid shell parameters (i.e. they don't contain shell
527
  metacharacters). If everything is ok, it will return the result of
528
  template % args.
529

530
  @type template: str
531
  @param template: the string holding the template for the
532
      string formatting
533
  @rtype: str
534
  @return: the expanded command line
535

536
  """
537
  for word in args:
538
    if not IsValidShellParam(word):
539
      raise errors.ProgrammerError("Shell argument '%s' contains"
540
                                   " invalid characters" % word)
541
  return template % args
542

    
543

    
544
def FormatOrdinal(value):
545
  """Formats a number as an ordinal in the English language.
546

547
  E.g. the number 1 becomes "1st", 22 becomes "22nd".
548

549
  @type value: integer
550
  @param value: Number
551
  @rtype: string
552

553
  """
554
  tens = value % 10
555

    
556
  if value > 10 and value < 20:
557
    suffix = "th"
558
  elif tens == 1:
559
    suffix = "st"
560
  elif tens == 2:
561
    suffix = "nd"
562
  elif tens == 3:
563
    suffix = "rd"
564
  else:
565
    suffix = "th"
566

    
567
  return "%s%s" % (value, suffix)
568

    
569

    
570
def Truncate(text, length):
571
  """Truncate string and add ellipsis if needed.
572

573
  @type text: string
574
  @param text: Text
575
  @type length: integer
576
  @param length: Desired length
577
  @rtype: string
578
  @return: Truncated text
579

580
  """
581
  assert length > len(_ASCII_ELLIPSIS)
582

    
583
  # Serialize if necessary
584
  if not isinstance(text, basestring):
585
    text = str(text)
586

    
587
  if len(text) <= length:
588
    return text
589
  else:
590
    return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS