Statistics
| Branch: | Tag: | Revision:

root / lib / utils / algo.py @ 9c007da8

History | View | Annotate | Download (2.7 kB)

1 4fd029cf Michael Hanselmann
#
2 4fd029cf Michael Hanselmann
#
3 4fd029cf Michael Hanselmann
4 4fd029cf Michael Hanselmann
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5 4fd029cf Michael Hanselmann
#
6 4fd029cf Michael Hanselmann
# This program is free software; you can redistribute it and/or modify
7 4fd029cf Michael Hanselmann
# it under the terms of the GNU General Public License as published by
8 4fd029cf Michael Hanselmann
# the Free Software Foundation; either version 2 of the License, or
9 4fd029cf Michael Hanselmann
# (at your option) any later version.
10 4fd029cf Michael Hanselmann
#
11 4fd029cf Michael Hanselmann
# This program is distributed in the hope that it will be useful, but
12 4fd029cf Michael Hanselmann
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 4fd029cf Michael Hanselmann
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 4fd029cf Michael Hanselmann
# General Public License for more details.
15 4fd029cf Michael Hanselmann
#
16 4fd029cf Michael Hanselmann
# You should have received a copy of the GNU General Public License
17 4fd029cf Michael Hanselmann
# along with this program; if not, write to the Free Software
18 4fd029cf Michael Hanselmann
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 4fd029cf Michael Hanselmann
# 02110-1301, USA.
20 4fd029cf Michael Hanselmann
21 4fd029cf Michael Hanselmann
"""Utility functions with algorithms.
22 4fd029cf Michael Hanselmann

23 4fd029cf Michael Hanselmann
"""
24 4fd029cf Michael Hanselmann
25 4fd029cf Michael Hanselmann
import re
26 4fd029cf Michael Hanselmann
27 4fd029cf Michael Hanselmann
28 4fd029cf Michael Hanselmann
_SORTER_RE = re.compile("^%s(.*)$" % (8 * "(\D+|\d+)?"))
29 4fd029cf Michael Hanselmann
_SORTER_DIGIT = re.compile("^\d+$")
30 4fd029cf Michael Hanselmann
31 4fd029cf Michael Hanselmann
32 4fd029cf Michael Hanselmann
def UniqueSequence(seq):
33 4fd029cf Michael Hanselmann
  """Returns a list with unique elements.
34 4fd029cf Michael Hanselmann

35 4fd029cf Michael Hanselmann
  Element order is preserved.
36 4fd029cf Michael Hanselmann

37 4fd029cf Michael Hanselmann
  @type seq: sequence
38 4fd029cf Michael Hanselmann
  @param seq: the sequence with the source elements
39 4fd029cf Michael Hanselmann
  @rtype: list
40 4fd029cf Michael Hanselmann
  @return: list of unique elements from seq
41 4fd029cf Michael Hanselmann

42 4fd029cf Michael Hanselmann
  """
43 4fd029cf Michael Hanselmann
  seen = set()
44 4fd029cf Michael Hanselmann
  return [i for i in seq if i not in seen and not seen.add(i)]
45 4fd029cf Michael Hanselmann
46 4fd029cf Michael Hanselmann
47 4fd029cf Michael Hanselmann
def FindDuplicates(seq):
48 4fd029cf Michael Hanselmann
  """Identifies duplicates in a list.
49 4fd029cf Michael Hanselmann

50 4fd029cf Michael Hanselmann
  Does not preserve element order.
51 4fd029cf Michael Hanselmann

52 4fd029cf Michael Hanselmann
  @type seq: sequence
53 4fd029cf Michael Hanselmann
  @param seq: Sequence with source elements
54 4fd029cf Michael Hanselmann
  @rtype: list
55 4fd029cf Michael Hanselmann
  @return: List of duplicate elements from seq
56 4fd029cf Michael Hanselmann

57 4fd029cf Michael Hanselmann
  """
58 4fd029cf Michael Hanselmann
  dup = set()
59 4fd029cf Michael Hanselmann
  seen = set()
60 4fd029cf Michael Hanselmann
61 4fd029cf Michael Hanselmann
  for item in seq:
62 4fd029cf Michael Hanselmann
    if item in seen:
63 4fd029cf Michael Hanselmann
      dup.add(item)
64 4fd029cf Michael Hanselmann
    else:
65 4fd029cf Michael Hanselmann
      seen.add(item)
66 4fd029cf Michael Hanselmann
67 4fd029cf Michael Hanselmann
  return list(dup)
68 4fd029cf Michael Hanselmann
69 4fd029cf Michael Hanselmann
70 4fd029cf Michael Hanselmann
def _NiceSortTryInt(val):
71 4fd029cf Michael Hanselmann
  """Attempts to convert a string to an integer.
72 4fd029cf Michael Hanselmann

73 4fd029cf Michael Hanselmann
  """
74 4fd029cf Michael Hanselmann
  if val and _SORTER_DIGIT.match(val):
75 4fd029cf Michael Hanselmann
    return int(val)
76 4fd029cf Michael Hanselmann
  else:
77 4fd029cf Michael Hanselmann
    return val
78 4fd029cf Michael Hanselmann
79 4fd029cf Michael Hanselmann
80 4fd029cf Michael Hanselmann
def _NiceSortKey(value):
81 4fd029cf Michael Hanselmann
  """Extract key for sorting.
82 4fd029cf Michael Hanselmann

83 4fd029cf Michael Hanselmann
  """
84 4fd029cf Michael Hanselmann
  return [_NiceSortTryInt(grp)
85 4fd029cf Michael Hanselmann
          for grp in _SORTER_RE.match(value).groups()]
86 4fd029cf Michael Hanselmann
87 4fd029cf Michael Hanselmann
88 4fd029cf Michael Hanselmann
def NiceSort(values, key=None):
89 4fd029cf Michael Hanselmann
  """Sort a list of strings based on digit and non-digit groupings.
90 4fd029cf Michael Hanselmann

91 4fd029cf Michael Hanselmann
  Given a list of names C{['a1', 'a10', 'a11', 'a2']} this function
92 4fd029cf Michael Hanselmann
  will sort the list in the logical order C{['a1', 'a2', 'a10',
93 4fd029cf Michael Hanselmann
  'a11']}.
94 4fd029cf Michael Hanselmann

95 4fd029cf Michael Hanselmann
  The sort algorithm breaks each name in groups of either only-digits
96 4fd029cf Michael Hanselmann
  or no-digits. Only the first eight such groups are considered, and
97 4fd029cf Michael Hanselmann
  after that we just use what's left of the string.
98 4fd029cf Michael Hanselmann

99 4fd029cf Michael Hanselmann
  @type values: list
100 4fd029cf Michael Hanselmann
  @param values: the names to be sorted
101 4fd029cf Michael Hanselmann
  @type key: callable or None
102 4fd029cf Michael Hanselmann
  @param key: function of one argument to extract a comparison key from each
103 4fd029cf Michael Hanselmann
    list element, must return string
104 4fd029cf Michael Hanselmann
  @rtype: list
105 4fd029cf Michael Hanselmann
  @return: a copy of the name list sorted with our algorithm
106 4fd029cf Michael Hanselmann

107 4fd029cf Michael Hanselmann
  """
108 4fd029cf Michael Hanselmann
  if key is None:
109 4fd029cf Michael Hanselmann
    keyfunc = _NiceSortKey
110 4fd029cf Michael Hanselmann
  else:
111 4fd029cf Michael Hanselmann
    keyfunc = lambda value: _NiceSortKey(key(value))
112 4fd029cf Michael Hanselmann
113 4fd029cf Michael Hanselmann
  return sorted(values, key=keyfunc)