Statistics
| Branch: | Tag: | Revision:

root / lib / utils / text.py @ 997f690f

History | View | Annotate | Download (12.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21
"""Utility functions for manipulating or working with text.
22

23
"""
24

    
25

    
26
import re
27
import os
28
import time
29
import collections
30

    
31
from ganeti import errors
32

    
33

    
34
#: Unit checker regexp
35
_PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")
36

    
37
#: Characters which don't need to be quoted for shell commands
38
_SHELL_UNQUOTED_RE = re.compile('^[-.,=:/_+@A-Za-z0-9]+$')
39

    
40
#: MAC checker regexp
41
_MAC_CHECK_RE = re.compile("^([0-9a-f]{2}:){5}[0-9a-f]{2}$", re.I)
42

    
43
#: Shell param checker regexp
44
_SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$")
45

    
46

    
47
def MatchNameComponent(key, name_list, case_sensitive=True):
48
  """Try to match a name against a list.
49

50
  This function will try to match a name like test1 against a list
51
  like C{['test1.example.com', 'test2.example.com', ...]}. Against
52
  this list, I{'test1'} as well as I{'test1.example'} will match, but
53
  not I{'test1.ex'}. A multiple match will be considered as no match
54
  at all (e.g. I{'test1'} against C{['test1.example.com',
55
  'test1.example.org']}), except when the key fully matches an entry
56
  (e.g. I{'test1'} against C{['test1', 'test1.example.com']}).
57

58
  @type key: str
59
  @param key: the name to be searched
60
  @type name_list: list
61
  @param name_list: the list of strings against which to search the key
62
  @type case_sensitive: boolean
63
  @param case_sensitive: whether to provide a case-sensitive match
64

65
  @rtype: None or str
66
  @return: None if there is no match I{or} if there are multiple matches,
67
      otherwise the element from the list which matches
68

69
  """
70
  if key in name_list:
71
    return key
72

    
73
  re_flags = 0
74
  if not case_sensitive:
75
    re_flags |= re.IGNORECASE
76
    key = key.upper()
77
  mo = re.compile("^%s(\..*)?$" % re.escape(key), re_flags)
78
  names_filtered = []
79
  string_matches = []
80
  for name in name_list:
81
    if mo.match(name) is not None:
82
      names_filtered.append(name)
83
      if not case_sensitive and key == name.upper():
84
        string_matches.append(name)
85

    
86
  if len(string_matches) == 1:
87
    return string_matches[0]
88
  if len(names_filtered) == 1:
89
    return names_filtered[0]
90
  return None
91

    
92

    
93
def FormatUnit(value, units):
94
  """Formats an incoming number of MiB with the appropriate unit.
95

96
  @type value: int
97
  @param value: integer representing the value in MiB (1048576)
98
  @type units: char
99
  @param units: the type of formatting we should do:
100
      - 'h' for automatic scaling
101
      - 'm' for MiBs
102
      - 'g' for GiBs
103
      - 't' for TiBs
104
  @rtype: str
105
  @return: the formatted value (with suffix)
106

107
  """
108
  if units not in ('m', 'g', 't', 'h'):
109
    raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units))
110

    
111
  suffix = ''
112

    
113
  if units == 'm' or (units == 'h' and value < 1024):
114
    if units == 'h':
115
      suffix = 'M'
116
    return "%d%s" % (round(value, 0), suffix)
117

    
118
  elif units == 'g' or (units == 'h' and value < (1024 * 1024)):
119
    if units == 'h':
120
      suffix = 'G'
121
    return "%0.1f%s" % (round(float(value) / 1024, 1), suffix)
122

    
123
  else:
124
    if units == 'h':
125
      suffix = 'T'
126
    return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
127

    
128

    
129
def ParseUnit(input_string):
130
  """Tries to extract number and scale from the given string.
131

132
  Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE*
133
  [UNIT]}. If no unit is specified, it defaults to MiB. Return value
134
  is always an int in MiB.
135

136
  """
137
  m = _PARSEUNIT_REGEX.match(str(input_string))
138
  if not m:
139
    raise errors.UnitParseError("Invalid format")
140

    
141
  value = float(m.groups()[0])
142

    
143
  unit = m.groups()[1]
144
  if unit:
145
    lcunit = unit.lower()
146
  else:
147
    lcunit = 'm'
148

    
149
  if lcunit in ('m', 'mb', 'mib'):
150
    # Value already in MiB
151
    pass
152

    
153
  elif lcunit in ('g', 'gb', 'gib'):
154
    value *= 1024
155

    
156
  elif lcunit in ('t', 'tb', 'tib'):
157
    value *= 1024 * 1024
158

    
159
  else:
160
    raise errors.UnitParseError("Unknown unit: %s" % unit)
161

    
162
  # Make sure we round up
163
  if int(value) < value:
164
    value += 1
165

    
166
  # Round up to the next multiple of 4
167
  value = int(value)
168
  if value % 4:
169
    value += 4 - value % 4
170

    
171
  return value
172

    
173

    
174
def ShellQuote(value):
175
  """Quotes shell argument according to POSIX.
176

177
  @type value: str
178
  @param value: the argument to be quoted
179
  @rtype: str
180
  @return: the quoted value
181

182
  """
183
  if _SHELL_UNQUOTED_RE.match(value):
184
    return value
185
  else:
186
    return "'%s'" % value.replace("'", "'\\''")
187

    
188

    
189
def ShellQuoteArgs(args):
190
  """Quotes a list of shell arguments.
191

192
  @type args: list
193
  @param args: list of arguments to be quoted
194
  @rtype: str
195
  @return: the quoted arguments concatenated with spaces
196

197
  """
198
  return " ".join([ShellQuote(i) for i in args])
199

    
200

    
201
class ShellWriter:
202
  """Helper class to write scripts with indentation.
203

204
  """
205
  INDENT_STR = "  "
206

    
207
  def __init__(self, fh):
208
    """Initializes this class.
209

210
    """
211
    self._fh = fh
212
    self._indent = 0
213

    
214
  def IncIndent(self):
215
    """Increase indentation level by 1.
216

217
    """
218
    self._indent += 1
219

    
220
  def DecIndent(self):
221
    """Decrease indentation level by 1.
222

223
    """
224
    assert self._indent > 0
225
    self._indent -= 1
226

    
227
  def Write(self, txt, *args):
228
    """Write line to output file.
229

230
    """
231
    assert self._indent >= 0
232

    
233
    self._fh.write(self._indent * self.INDENT_STR)
234

    
235
    if args:
236
      self._fh.write(txt % args)
237
    else:
238
      self._fh.write(txt)
239

    
240
    self._fh.write("\n")
241

    
242

    
243
def GenerateSecret(numbytes=20):
244
  """Generates a random secret.
245

246
  This will generate a pseudo-random secret returning an hex string
247
  (so that it can be used where an ASCII string is needed).
248

249
  @param numbytes: the number of bytes which will be represented by the returned
250
      string (defaulting to 20, the length of a SHA1 hash)
251
  @rtype: str
252
  @return: an hex representation of the pseudo-random sequence
253

254
  """
255
  return os.urandom(numbytes).encode("hex")
256

    
257

    
258
def NormalizeAndValidateMac(mac):
259
  """Normalizes and check if a MAC address is valid.
260

261
  Checks whether the supplied MAC address is formally correct, only
262
  accepts colon separated format. Normalize it to all lower.
263

264
  @type mac: str
265
  @param mac: the MAC to be validated
266
  @rtype: str
267
  @return: returns the normalized and validated MAC.
268

269
  @raise errors.OpPrereqError: If the MAC isn't valid
270

271
  """
272
  if not _MAC_CHECK_RE.match(mac):
273
    raise errors.OpPrereqError("Invalid MAC address '%s'" % mac,
274
                               errors.ECODE_INVAL)
275

    
276
  return mac.lower()
277

    
278

    
279
def SafeEncode(text):
280
  """Return a 'safe' version of a source string.
281

282
  This function mangles the input string and returns a version that
283
  should be safe to display/encode as ASCII. To this end, we first
284
  convert it to ASCII using the 'backslashreplace' encoding which
285
  should get rid of any non-ASCII chars, and then we process it
286
  through a loop copied from the string repr sources in the python; we
287
  don't use string_escape anymore since that escape single quotes and
288
  backslashes too, and that is too much; and that escaping is not
289
  stable, i.e. string_escape(string_escape(x)) != string_escape(x).
290

291
  @type text: str or unicode
292
  @param text: input data
293
  @rtype: str
294
  @return: a safe version of text
295

296
  """
297
  if isinstance(text, unicode):
298
    # only if unicode; if str already, we handle it below
299
    text = text.encode('ascii', 'backslashreplace')
300
  resu = ""
301
  for char in text:
302
    c = ord(char)
303
    if char  == '\t':
304
      resu += r'\t'
305
    elif char == '\n':
306
      resu += r'\n'
307
    elif char == '\r':
308
      resu += r'\'r'
309
    elif c < 32 or c >= 127: # non-printable
310
      resu += "\\x%02x" % (c & 0xff)
311
    else:
312
      resu += char
313
  return resu
314

    
315

    
316
def UnescapeAndSplit(text, sep=","):
317
  """Split and unescape a string based on a given separator.
318

319
  This function splits a string based on a separator where the
320
  separator itself can be escape in order to be an element of the
321
  elements. The escaping rules are (assuming coma being the
322
  separator):
323
    - a plain , separates the elements
324
    - a sequence \\\\, (double backslash plus comma) is handled as a
325
      backslash plus a separator comma
326
    - a sequence \, (backslash plus comma) is handled as a
327
      non-separator comma
328

329
  @type text: string
330
  @param text: the string to split
331
  @type sep: string
332
  @param text: the separator
333
  @rtype: string
334
  @return: a list of strings
335

336
  """
337
  # we split the list by sep (with no escaping at this stage)
338
  slist = text.split(sep)
339
  # next, we revisit the elements and if any of them ended with an odd
340
  # number of backslashes, then we join it with the next
341
  rlist = []
342
  while slist:
343
    e1 = slist.pop(0)
344
    if e1.endswith("\\"):
345
      num_b = len(e1) - len(e1.rstrip("\\"))
346
      if num_b % 2 == 1 and slist:
347
        e2 = slist.pop(0)
348
        # Merge the two elements and push the result back to the source list for
349
        # revisiting. If e2 ended with backslashes, further merging may need to
350
        # be done.
351
        slist.insert(0, e1 + sep + e2)
352
        continue
353
    # here the backslashes remain (all), and will be reduced in the next step
354
    rlist.append(e1)
355
  # finally, replace backslash-something with something
356
  rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist]
357
  return rlist
358

    
359

    
360
def CommaJoin(names):
361
  """Nicely join a set of identifiers.
362

363
  @param names: set, list or tuple
364
  @return: a string with the formatted results
365

366
  """
367
  return ", ".join([str(val) for val in names])
368

    
369

    
370
def FormatTime(val):
371
  """Formats a time value.
372

373
  @type val: float or None
374
  @param val: Timestamp as returned by time.time() (seconds since Epoch,
375
    1970-01-01 00:00:00 UTC)
376
  @return: a string value or N/A if we don't have a valid timestamp
377

378
  """
379
  if val is None or not isinstance(val, (int, float)):
380
    return "N/A"
381
  # these two codes works on Linux, but they are not guaranteed on all
382
  # platforms
383
  return time.strftime("%F %T", time.localtime(val))
384

    
385

    
386
def FormatSeconds(secs):
387
  """Formats seconds for easier reading.
388

389
  @type secs: number
390
  @param secs: Number of seconds
391
  @rtype: string
392
  @return: Formatted seconds (e.g. "2d 9h 19m 49s")
393

394
  """
395
  parts = []
396

    
397
  secs = round(secs, 0)
398

    
399
  if secs > 0:
400
    # Negative values would be a bit tricky
401
    for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]:
402
      (complete, secs) = divmod(secs, one)
403
      if complete or parts:
404
        parts.append("%d%s" % (complete, unit))
405

    
406
  parts.append("%ds" % secs)
407

    
408
  return " ".join(parts)
409

    
410

    
411
class LineSplitter:
412
  """Splits data chunks into lines separated by newline.
413

414
  Instances provide a file-like interface.
415

416
  """
417
  def __init__(self, line_fn, *args):
418
    """Initializes this class.
419

420
    @type line_fn: callable
421
    @param line_fn: Function called for each line, first parameter is line
422
    @param args: Extra arguments for L{line_fn}
423

424
    """
425
    assert callable(line_fn)
426

    
427
    if args:
428
      # Python 2.4 doesn't have functools.partial yet
429
      self._line_fn = \
430
        lambda line: line_fn(line, *args) # pylint: disable-msg=W0142
431
    else:
432
      self._line_fn = line_fn
433

    
434
    self._lines = collections.deque()
435
    self._buffer = ""
436

    
437
  def write(self, data):
438
    parts = (self._buffer + data).split("\n")
439
    self._buffer = parts.pop()
440
    self._lines.extend(parts)
441

    
442
  def flush(self):
443
    while self._lines:
444
      self._line_fn(self._lines.popleft().rstrip("\r\n"))
445

    
446
  def close(self):
447
    self.flush()
448
    if self._buffer:
449
      self._line_fn(self._buffer)
450

    
451

    
452
def IsValidShellParam(word):
453
  """Verifies is the given word is safe from the shell's p.o.v.
454

455
  This means that we can pass this to a command via the shell and be
456
  sure that it doesn't alter the command line and is passed as such to
457
  the actual command.
458

459
  Note that we are overly restrictive here, in order to be on the safe
460
  side.
461

462
  @type word: str
463
  @param word: the word to check
464
  @rtype: boolean
465
  @return: True if the word is 'safe'
466

467
  """
468
  return bool(_SHELLPARAM_REGEX.match(word))
469

    
470

    
471
def BuildShellCmd(template, *args):
472
  """Build a safe shell command line from the given arguments.
473

474
  This function will check all arguments in the args list so that they
475
  are valid shell parameters (i.e. they don't contain shell
476
  metacharacters). If everything is ok, it will return the result of
477
  template % args.
478

479
  @type template: str
480
  @param template: the string holding the template for the
481
      string formatting
482
  @rtype: str
483
  @return: the expanded command line
484

485
  """
486
  for word in args:
487
    if not IsValidShellParam(word):
488
      raise errors.ProgrammerError("Shell argument '%s' contains"
489
                                   " invalid characters" % word)
490
  return template % args