Statistics
| Branch: | Tag: | Revision:

root / lib / utils / text.py @ adec726e

History | View | Annotate | Download (17.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Utility functions for manipulating or working with text.
22

23
"""
24

    
25

    
26
import re
27
import os
28
import time
29
import collections
30

    
31
from ganeti import errors
32

    
33

    
34
#: Unit checker regexp
35
_PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")
36

    
37
#: Characters which don't need to be quoted for shell commands
38
_SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$")
39

    
40
#: Shell param checker regexp
41
_SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$")
42

    
43
#: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026)
44
_ASCII_ELLIPSIS = "..."
45

    
46
#: MAC address octet
47
_MAC_ADDR_OCTET_RE = r"[0-9a-f]{2}"
48

    
49

    
50
def MatchNameComponent(key, name_list, case_sensitive=True):
51
  """Try to match a name against a list.
52

53
  This function will try to match a name like test1 against a list
54
  like C{['test1.example.com', 'test2.example.com', ...]}. Against
55
  this list, I{'test1'} as well as I{'test1.example'} will match, but
56
  not I{'test1.ex'}. A multiple match will be considered as no match
57
  at all (e.g. I{'test1'} against C{['test1.example.com',
58
  'test1.example.org']}), except when the key fully matches an entry
59
  (e.g. I{'test1'} against C{['test1', 'test1.example.com']}).
60

61
  @type key: str
62
  @param key: the name to be searched
63
  @type name_list: list
64
  @param name_list: the list of strings against which to search the key
65
  @type case_sensitive: boolean
66
  @param case_sensitive: whether to provide a case-sensitive match
67

68
  @rtype: None or str
69
  @return: None if there is no match I{or} if there are multiple matches,
70
      otherwise the element from the list which matches
71

72
  """
73
  if key in name_list:
74
    return key
75

    
76
  re_flags = 0
77
  if not case_sensitive:
78
    re_flags |= re.IGNORECASE
79
    key = key.upper()
80

    
81
  name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags)
82

    
83
  names_filtered = []
84
  string_matches = []
85
  for name in name_list:
86
    if name_re.match(name) is not None:
87
      names_filtered.append(name)
88
      if not case_sensitive and key == name.upper():
89
        string_matches.append(name)
90

    
91
  if len(string_matches) == 1:
92
    return string_matches[0]
93
  if len(names_filtered) == 1:
94
    return names_filtered[0]
95

    
96
  return None
97

    
98

    
99
def _DnsNameGlobHelper(match):
100
  """Helper function for L{DnsNameGlobPattern}.
101

102
  Returns regular expression pattern for parts of the pattern.
103

104
  """
105
  text = match.group(0)
106

    
107
  if text == "*":
108
    return "[^.]*"
109
  elif text == "?":
110
    return "[^.]"
111
  else:
112
    return re.escape(text)
113

    
114

    
115
def DnsNameGlobPattern(pattern):
116
  """Generates regular expression from DNS name globbing pattern.
117

118
  A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular
119
  expression. Escape sequences or ranges (e.g. [a-z]) are not supported.
120

121
  Matching always starts at the leftmost part. An asterisk (*) matches all
122
  characters except the dot (.) separating DNS name parts. A question mark (?)
123
  matches a single character except the dot (.).
124

125
  @type pattern: string
126
  @param pattern: DNS name globbing pattern
127
  @rtype: string
128
  @return: Regular expression
129

130
  """
131
  return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
132

    
133

    
134
def FormatUnit(value, units):
135
  """Formats an incoming number of MiB with the appropriate unit.
136

137
  @type value: int
138
  @param value: integer representing the value in MiB (1048576)
139
  @type units: char
140
  @param units: the type of formatting we should do:
141
      - 'h' for automatic scaling
142
      - 'm' for MiBs
143
      - 'g' for GiBs
144
      - 't' for TiBs
145
  @rtype: str
146
  @return: the formatted value (with suffix)
147

148
  """
149
  if units not in ("m", "g", "t", "h"):
150
    raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units))
151

    
152
  suffix = ""
153

    
154
  if units == "m" or (units == "h" and value < 1024):
155
    if units == "h":
156
      suffix = "M"
157
    return "%d%s" % (round(value, 0), suffix)
158

    
159
  elif units == "g" or (units == "h" and value < (1024 * 1024)):
160
    if units == "h":
161
      suffix = "G"
162
    return "%0.1f%s" % (round(float(value) / 1024, 1), suffix)
163

    
164
  else:
165
    if units == "h":
166
      suffix = "T"
167
    return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
168

    
169

    
170
def ParseUnit(input_string):
171
  """Tries to extract number and scale from the given string.
172

173
  Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE*
174
  [UNIT]}. If no unit is specified, it defaults to MiB. Return value
175
  is always an int in MiB.
176

177
  """
178
  m = _PARSEUNIT_REGEX.match(str(input_string))
179
  if not m:
180
    raise errors.UnitParseError("Invalid format")
181

    
182
  value = float(m.groups()[0])
183

    
184
  unit = m.groups()[1]
185
  if unit:
186
    lcunit = unit.lower()
187
  else:
188
    lcunit = "m"
189

    
190
  if lcunit in ("m", "mb", "mib"):
191
    # Value already in MiB
192
    pass
193

    
194
  elif lcunit in ("g", "gb", "gib"):
195
    value *= 1024
196

    
197
  elif lcunit in ("t", "tb", "tib"):
198
    value *= 1024 * 1024
199

    
200
  else:
201
    raise errors.UnitParseError("Unknown unit: %s" % unit)
202

    
203
  # Make sure we round up
204
  if int(value) < value:
205
    value += 1
206

    
207
  # Round up to the next multiple of 4
208
  value = int(value)
209
  if value % 4:
210
    value += 4 - value % 4
211

    
212
  return value
213

    
214

    
215
def ShellQuote(value):
216
  """Quotes shell argument according to POSIX.
217

218
  @type value: str
219
  @param value: the argument to be quoted
220
  @rtype: str
221
  @return: the quoted value
222

223
  """
224
  if _SHELL_UNQUOTED_RE.match(value):
225
    return value
226
  else:
227
    return "'%s'" % value.replace("'", "'\\''")
228

    
229

    
230
def ShellQuoteArgs(args):
231
  """Quotes a list of shell arguments.
232

233
  @type args: list
234
  @param args: list of arguments to be quoted
235
  @rtype: str
236
  @return: the quoted arguments concatenated with spaces
237

238
  """
239
  return " ".join([ShellQuote(i) for i in args])
240

    
241

    
242
class ShellWriter:
243
  """Helper class to write scripts with indentation.
244

245
  """
246
  INDENT_STR = "  "
247

    
248
  def __init__(self, fh, indent=True):
249
    """Initializes this class.
250

251
    """
252
    self._fh = fh
253
    self._indent_enabled = indent
254
    self._indent = 0
255

    
256
  def IncIndent(self):
257
    """Increase indentation level by 1.
258

259
    """
260
    self._indent += 1
261

    
262
  def DecIndent(self):
263
    """Decrease indentation level by 1.
264

265
    """
266
    assert self._indent > 0
267
    self._indent -= 1
268

    
269
  def Write(self, txt, *args):
270
    """Write line to output file.
271

272
    """
273
    assert self._indent >= 0
274

    
275
    if args:
276
      line = txt % args
277
    else:
278
      line = txt
279

    
280
    if line and self._indent_enabled:
281
      # Indent only if there's something on the line
282
      self._fh.write(self._indent * self.INDENT_STR)
283

    
284
    self._fh.write(line)
285

    
286
    self._fh.write("\n")
287

    
288

    
289
def GenerateSecret(numbytes=20):
290
  """Generates a random secret.
291

292
  This will generate a pseudo-random secret returning an hex string
293
  (so that it can be used where an ASCII string is needed).
294

295
  @param numbytes: the number of bytes which will be represented by the returned
296
      string (defaulting to 20, the length of a SHA1 hash)
297
  @rtype: str
298
  @return: an hex representation of the pseudo-random sequence
299

300
  """
301
  return os.urandom(numbytes).encode("hex")
302

    
303

    
304
def _MakeMacAddrRegexp(octets):
305
  """Builds a regular expression for verifying MAC addresses.
306

307
  @type octets: integer
308
  @param octets: How many octets to expect (1-6)
309
  @return: Compiled regular expression
310

311
  """
312
  assert octets > 0
313
  assert octets <= 6
314

    
315
  return re.compile("^%s$" % ":".join([_MAC_ADDR_OCTET_RE] * octets),
316
                    re.I)
317

    
318

    
319
#: Regular expression for full MAC address
320
_MAC_CHECK_RE = _MakeMacAddrRegexp(6)
321

    
322
#: Regular expression for half a MAC address
323
_MAC_PREFIX_CHECK_RE = _MakeMacAddrRegexp(3)
324

    
325

    
326
def _MacAddressCheck(check_re, mac, msg):
327
  """Checks a MAC address using a regular expression.
328

329
  @param check_re: Compiled regular expression as returned by C{re.compile}
330
  @type mac: string
331
  @param mac: MAC address to be validated
332
  @type msg: string
333
  @param msg: Error message (%s will be replaced with MAC address)
334

335
  """
336
  if check_re.match(mac):
337
    return mac.lower()
338

    
339
  raise errors.OpPrereqError(msg % mac, errors.ECODE_INVAL)
340

    
341

    
342
def NormalizeAndValidateMac(mac):
343
  """Normalizes and check if a MAC address is valid and contains six octets.
344

345
  Checks whether the supplied MAC address is formally correct. Accepts
346
  colon-separated format only. Normalize it to all lower case.
347

348
  @type mac: string
349
  @param mac: MAC address to be validated
350
  @rtype: string
351
  @return: Normalized and validated MAC address
352
  @raise errors.OpPrereqError: If the MAC address isn't valid
353

354
  """
355
  return _MacAddressCheck(_MAC_CHECK_RE, mac, "Invalid MAC address '%s'")
356

    
357

    
358
def NormalizeAndValidateThreeOctetMacPrefix(mac):
359
  """Normalizes a potential MAC address prefix (three octets).
360

361
  Checks whether the supplied string is a valid MAC address prefix consisting
362
  of three colon-separated octets. The result is normalized to all lower case.
363

364
  @type mac: string
365
  @param mac: Prefix to be validated
366
  @rtype: string
367
  @return: Normalized and validated prefix
368
  @raise errors.OpPrereqError: If the MAC address prefix isn't valid
369

370
  """
371
  return _MacAddressCheck(_MAC_PREFIX_CHECK_RE, mac,
372
                          "Invalid MAC address prefix '%s'")
373

    
374

    
375
def SafeEncode(text):
376
  """Return a 'safe' version of a source string.
377

378
  This function mangles the input string and returns a version that
379
  should be safe to display/encode as ASCII. To this end, we first
380
  convert it to ASCII using the 'backslashreplace' encoding which
381
  should get rid of any non-ASCII chars, and then we process it
382
  through a loop copied from the string repr sources in the python; we
383
  don't use string_escape anymore since that escape single quotes and
384
  backslashes too, and that is too much; and that escaping is not
385
  stable, i.e. string_escape(string_escape(x)) != string_escape(x).
386

387
  @type text: str or unicode
388
  @param text: input data
389
  @rtype: str
390
  @return: a safe version of text
391

392
  """
393
  if isinstance(text, unicode):
394
    # only if unicode; if str already, we handle it below
395
    text = text.encode("ascii", "backslashreplace")
396
  resu = ""
397
  for char in text:
398
    c = ord(char)
399
    if char == "\t":
400
      resu += r"\t"
401
    elif char == "\n":
402
      resu += r"\n"
403
    elif char == "\r":
404
      resu += r'\'r'
405
    elif c < 32 or c >= 127: # non-printable
406
      resu += "\\x%02x" % (c & 0xff)
407
    else:
408
      resu += char
409
  return resu
410

    
411

    
412
def UnescapeAndSplit(text, sep=","):
413
  r"""Split and unescape a string based on a given separator.
414

415
  This function splits a string based on a separator where the
416
  separator itself can be escape in order to be an element of the
417
  elements. The escaping rules are (assuming coma being the
418
  separator):
419
    - a plain , separates the elements
420
    - a sequence \\\\, (double backslash plus comma) is handled as a
421
      backslash plus a separator comma
422
    - a sequence \, (backslash plus comma) is handled as a
423
      non-separator comma
424

425
  @type text: string
426
  @param text: the string to split
427
  @type sep: string
428
  @param text: the separator
429
  @rtype: string
430
  @return: a list of strings
431

432
  """
433
  # we split the list by sep (with no escaping at this stage)
434
  slist = text.split(sep)
435
  # next, we revisit the elements and if any of them ended with an odd
436
  # number of backslashes, then we join it with the next
437
  rlist = []
438
  while slist:
439
    e1 = slist.pop(0)
440
    if e1.endswith("\\"):
441
      num_b = len(e1) - len(e1.rstrip("\\"))
442
      if num_b % 2 == 1 and slist:
443
        e2 = slist.pop(0)
444
        # Merge the two elements and push the result back to the source list for
445
        # revisiting. If e2 ended with backslashes, further merging may need to
446
        # be done.
447
        slist.insert(0, e1 + sep + e2)
448
        continue
449
    # here the backslashes remain (all), and will be reduced in the next step
450
    rlist.append(e1)
451
  # finally, replace backslash-something with something
452
  rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist]
453
  return rlist
454

    
455

    
456
def EscapeAndJoin(slist, sep=","):
457
  """Encode a list in a way parsable by UnescapeAndSplit.
458

459
  @type slist: list of strings
460
  @param slist: the strings to be encoded
461
  @rtype: string
462
  @return: the encoding of the list oas a string
463

464
  """
465
  return sep.join([re.sub("\\" + sep, "\\\\" + sep,
466
                          re.sub(r"\\", r"\\\\", v)) for v in slist])
467

    
468

    
469
def CommaJoin(names):
470
  """Nicely join a set of identifiers.
471

472
  @param names: set, list or tuple
473
  @return: a string with the formatted results
474

475
  """
476
  return ", ".join([str(val) for val in names])
477

    
478

    
479
def FormatTime(val, usecs=None):
480
  """Formats a time value.
481

482
  @type val: float or None
483
  @param val: Timestamp as returned by time.time() (seconds since Epoch,
484
    1970-01-01 00:00:00 UTC)
485
  @return: a string value or N/A if we don't have a valid timestamp
486

487
  """
488
  if val is None or not isinstance(val, (int, float)):
489
    return "N/A"
490

    
491
  # these two codes works on Linux, but they are not guaranteed on all
492
  # platforms
493
  result = time.strftime("%F %T", time.localtime(val))
494

    
495
  if usecs is not None:
496
    result += ".%06d" % usecs
497

    
498
  return result
499

    
500

    
501
def FormatSeconds(secs):
502
  """Formats seconds for easier reading.
503

504
  @type secs: number
505
  @param secs: Number of seconds
506
  @rtype: string
507
  @return: Formatted seconds (e.g. "2d 9h 19m 49s")
508

509
  """
510
  parts = []
511

    
512
  secs = round(secs, 0)
513

    
514
  if secs > 0:
515
    # Negative values would be a bit tricky
516
    for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]:
517
      (complete, secs) = divmod(secs, one)
518
      if complete or parts:
519
        parts.append("%d%s" % (complete, unit))
520

    
521
  parts.append("%ds" % secs)
522

    
523
  return " ".join(parts)
524

    
525

    
526
class LineSplitter:
527
  """Splits data chunks into lines separated by newline.
528

529
  Instances provide a file-like interface.
530

531
  """
532
  def __init__(self, line_fn, *args):
533
    """Initializes this class.
534

535
    @type line_fn: callable
536
    @param line_fn: Function called for each line, first parameter is line
537
    @param args: Extra arguments for L{line_fn}
538

539
    """
540
    assert callable(line_fn)
541

    
542
    if args:
543
      # Python 2.4 doesn't have functools.partial yet
544
      self._line_fn = \
545
        lambda line: line_fn(line, *args) # pylint: disable=W0142
546
    else:
547
      self._line_fn = line_fn
548

    
549
    self._lines = collections.deque()
550
    self._buffer = ""
551

    
552
  def write(self, data):
553
    parts = (self._buffer + data).split("\n")
554
    self._buffer = parts.pop()
555
    self._lines.extend(parts)
556

    
557
  def flush(self):
558
    while self._lines:
559
      self._line_fn(self._lines.popleft().rstrip("\r\n"))
560

    
561
  def close(self):
562
    self.flush()
563
    if self._buffer:
564
      self._line_fn(self._buffer)
565

    
566

    
567
def IsValidShellParam(word):
568
  """Verifies is the given word is safe from the shell's p.o.v.
569

570
  This means that we can pass this to a command via the shell and be
571
  sure that it doesn't alter the command line and is passed as such to
572
  the actual command.
573

574
  Note that we are overly restrictive here, in order to be on the safe
575
  side.
576

577
  @type word: str
578
  @param word: the word to check
579
  @rtype: boolean
580
  @return: True if the word is 'safe'
581

582
  """
583
  return bool(_SHELLPARAM_REGEX.match(word))
584

    
585

    
586
def BuildShellCmd(template, *args):
587
  """Build a safe shell command line from the given arguments.
588

589
  This function will check all arguments in the args list so that they
590
  are valid shell parameters (i.e. they don't contain shell
591
  metacharacters). If everything is ok, it will return the result of
592
  template % args.
593

594
  @type template: str
595
  @param template: the string holding the template for the
596
      string formatting
597
  @rtype: str
598
  @return: the expanded command line
599

600
  """
601
  for word in args:
602
    if not IsValidShellParam(word):
603
      raise errors.ProgrammerError("Shell argument '%s' contains"
604
                                   " invalid characters" % word)
605
  return template % args
606

    
607

    
608
def FormatOrdinal(value):
609
  """Formats a number as an ordinal in the English language.
610

611
  E.g. the number 1 becomes "1st", 22 becomes "22nd".
612

613
  @type value: integer
614
  @param value: Number
615
  @rtype: string
616

617
  """
618
  tens = value % 10
619

    
620
  if value > 10 and value < 20:
621
    suffix = "th"
622
  elif tens == 1:
623
    suffix = "st"
624
  elif tens == 2:
625
    suffix = "nd"
626
  elif tens == 3:
627
    suffix = "rd"
628
  else:
629
    suffix = "th"
630

    
631
  return "%s%s" % (value, suffix)
632

    
633

    
634
def Truncate(text, length):
635
  """Truncate string and add ellipsis if needed.
636

637
  @type text: string
638
  @param text: Text
639
  @type length: integer
640
  @param length: Desired length
641
  @rtype: string
642
  @return: Truncated text
643

644
  """
645
  assert length > len(_ASCII_ELLIPSIS)
646

    
647
  # Serialize if necessary
648
  if not isinstance(text, basestring):
649
    text = str(text)
650

    
651
  if len(text) <= length:
652
    return text
653
  else:
654
    return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS
655

    
656

    
657
def FilterEmptyLinesAndComments(text):
658
  """Filters empty lines and comments from a line-based string.
659

660
  Whitespace is also removed from the beginning and end of all lines.
661

662
  @type text: string
663
  @param text: Input string
664
  @rtype: list
665

666
  """
667
  return [line for line in map(lambda s: s.strip(), text.splitlines())
668
          # Ignore empty lines and comments
669
          if line and not line.startswith("#")]
670

    
671

    
672
def FormatKeyValue(data):
673
  """Formats a dictionary as "key=value" parameters.
674

675
  The keys are sorted to have a stable order.
676

677
  @type data: dict
678
  @rtype: list of string
679

680
  """
681
  return ["%s=%s" % (key, value) for (key, value) in sorted(data.items())]