root / lib / utils / algo.py @ 9c007da8
History | View | Annotate | Download (2.7 kB)
1 | 4fd029cf | Michael Hanselmann | #
|
---|---|---|---|
2 | 4fd029cf | Michael Hanselmann | #
|
3 | 4fd029cf | Michael Hanselmann | |
4 | 4fd029cf | Michael Hanselmann | # Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
|
5 | 4fd029cf | Michael Hanselmann | #
|
6 | 4fd029cf | Michael Hanselmann | # This program is free software; you can redistribute it and/or modify
|
7 | 4fd029cf | Michael Hanselmann | # it under the terms of the GNU General Public License as published by
|
8 | 4fd029cf | Michael Hanselmann | # the Free Software Foundation; either version 2 of the License, or
|
9 | 4fd029cf | Michael Hanselmann | # (at your option) any later version.
|
10 | 4fd029cf | Michael Hanselmann | #
|
11 | 4fd029cf | Michael Hanselmann | # This program is distributed in the hope that it will be useful, but
|
12 | 4fd029cf | Michael Hanselmann | # WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 | 4fd029cf | Michael Hanselmann | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 | 4fd029cf | Michael Hanselmann | # General Public License for more details.
|
15 | 4fd029cf | Michael Hanselmann | #
|
16 | 4fd029cf | Michael Hanselmann | # You should have received a copy of the GNU General Public License
|
17 | 4fd029cf | Michael Hanselmann | # along with this program; if not, write to the Free Software
|
18 | 4fd029cf | Michael Hanselmann | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
19 | 4fd029cf | Michael Hanselmann | # 02110-1301, USA.
|
20 | 4fd029cf | Michael Hanselmann | |
21 | 4fd029cf | Michael Hanselmann | """Utility functions with algorithms.
|
22 | 4fd029cf | Michael Hanselmann |
|
23 | 4fd029cf | Michael Hanselmann | """
|
24 | 4fd029cf | Michael Hanselmann | |
25 | 4fd029cf | Michael Hanselmann | import re |
26 | 4fd029cf | Michael Hanselmann | |
27 | 4fd029cf | Michael Hanselmann | |
28 | 4fd029cf | Michael Hanselmann | _SORTER_RE = re.compile("^%s(.*)$" % (8 * "(\D+|\d+)?")) |
29 | 4fd029cf | Michael Hanselmann | _SORTER_DIGIT = re.compile("^\d+$")
|
30 | 4fd029cf | Michael Hanselmann | |
31 | 4fd029cf | Michael Hanselmann | |
32 | 4fd029cf | Michael Hanselmann | def UniqueSequence(seq): |
33 | 4fd029cf | Michael Hanselmann | """Returns a list with unique elements.
|
34 | 4fd029cf | Michael Hanselmann |
|
35 | 4fd029cf | Michael Hanselmann | Element order is preserved.
|
36 | 4fd029cf | Michael Hanselmann |
|
37 | 4fd029cf | Michael Hanselmann | @type seq: sequence
|
38 | 4fd029cf | Michael Hanselmann | @param seq: the sequence with the source elements
|
39 | 4fd029cf | Michael Hanselmann | @rtype: list
|
40 | 4fd029cf | Michael Hanselmann | @return: list of unique elements from seq
|
41 | 4fd029cf | Michael Hanselmann |
|
42 | 4fd029cf | Michael Hanselmann | """
|
43 | 4fd029cf | Michael Hanselmann | seen = set()
|
44 | 4fd029cf | Michael Hanselmann | return [i for i in seq if i not in seen and not seen.add(i)] |
45 | 4fd029cf | Michael Hanselmann | |
46 | 4fd029cf | Michael Hanselmann | |
47 | 4fd029cf | Michael Hanselmann | def FindDuplicates(seq): |
48 | 4fd029cf | Michael Hanselmann | """Identifies duplicates in a list.
|
49 | 4fd029cf | Michael Hanselmann |
|
50 | 4fd029cf | Michael Hanselmann | Does not preserve element order.
|
51 | 4fd029cf | Michael Hanselmann |
|
52 | 4fd029cf | Michael Hanselmann | @type seq: sequence
|
53 | 4fd029cf | Michael Hanselmann | @param seq: Sequence with source elements
|
54 | 4fd029cf | Michael Hanselmann | @rtype: list
|
55 | 4fd029cf | Michael Hanselmann | @return: List of duplicate elements from seq
|
56 | 4fd029cf | Michael Hanselmann |
|
57 | 4fd029cf | Michael Hanselmann | """
|
58 | 4fd029cf | Michael Hanselmann | dup = set()
|
59 | 4fd029cf | Michael Hanselmann | seen = set()
|
60 | 4fd029cf | Michael Hanselmann | |
61 | 4fd029cf | Michael Hanselmann | for item in seq: |
62 | 4fd029cf | Michael Hanselmann | if item in seen: |
63 | 4fd029cf | Michael Hanselmann | dup.add(item) |
64 | 4fd029cf | Michael Hanselmann | else:
|
65 | 4fd029cf | Michael Hanselmann | seen.add(item) |
66 | 4fd029cf | Michael Hanselmann | |
67 | 4fd029cf | Michael Hanselmann | return list(dup) |
68 | 4fd029cf | Michael Hanselmann | |
69 | 4fd029cf | Michael Hanselmann | |
70 | 4fd029cf | Michael Hanselmann | def _NiceSortTryInt(val): |
71 | 4fd029cf | Michael Hanselmann | """Attempts to convert a string to an integer.
|
72 | 4fd029cf | Michael Hanselmann |
|
73 | 4fd029cf | Michael Hanselmann | """
|
74 | 4fd029cf | Michael Hanselmann | if val and _SORTER_DIGIT.match(val): |
75 | 4fd029cf | Michael Hanselmann | return int(val) |
76 | 4fd029cf | Michael Hanselmann | else:
|
77 | 4fd029cf | Michael Hanselmann | return val
|
78 | 4fd029cf | Michael Hanselmann | |
79 | 4fd029cf | Michael Hanselmann | |
80 | 4fd029cf | Michael Hanselmann | def _NiceSortKey(value): |
81 | 4fd029cf | Michael Hanselmann | """Extract key for sorting.
|
82 | 4fd029cf | Michael Hanselmann |
|
83 | 4fd029cf | Michael Hanselmann | """
|
84 | 4fd029cf | Michael Hanselmann | return [_NiceSortTryInt(grp)
|
85 | 4fd029cf | Michael Hanselmann | for grp in _SORTER_RE.match(value).groups()] |
86 | 4fd029cf | Michael Hanselmann | |
87 | 4fd029cf | Michael Hanselmann | |
88 | 4fd029cf | Michael Hanselmann | def NiceSort(values, key=None): |
89 | 4fd029cf | Michael Hanselmann | """Sort a list of strings based on digit and non-digit groupings.
|
90 | 4fd029cf | Michael Hanselmann |
|
91 | 4fd029cf | Michael Hanselmann | Given a list of names C{['a1', 'a10', 'a11', 'a2']} this function
|
92 | 4fd029cf | Michael Hanselmann | will sort the list in the logical order C{['a1', 'a2', 'a10',
|
93 | 4fd029cf | Michael Hanselmann | 'a11']}.
|
94 | 4fd029cf | Michael Hanselmann |
|
95 | 4fd029cf | Michael Hanselmann | The sort algorithm breaks each name in groups of either only-digits
|
96 | 4fd029cf | Michael Hanselmann | or no-digits. Only the first eight such groups are considered, and
|
97 | 4fd029cf | Michael Hanselmann | after that we just use what's left of the string.
|
98 | 4fd029cf | Michael Hanselmann |
|
99 | 4fd029cf | Michael Hanselmann | @type values: list
|
100 | 4fd029cf | Michael Hanselmann | @param values: the names to be sorted
|
101 | 4fd029cf | Michael Hanselmann | @type key: callable or None
|
102 | 4fd029cf | Michael Hanselmann | @param key: function of one argument to extract a comparison key from each
|
103 | 4fd029cf | Michael Hanselmann | list element, must return string
|
104 | 4fd029cf | Michael Hanselmann | @rtype: list
|
105 | 4fd029cf | Michael Hanselmann | @return: a copy of the name list sorted with our algorithm
|
106 | 4fd029cf | Michael Hanselmann |
|
107 | 4fd029cf | Michael Hanselmann | """
|
108 | 4fd029cf | Michael Hanselmann | if key is None: |
109 | 4fd029cf | Michael Hanselmann | keyfunc = _NiceSortKey |
110 | 4fd029cf | Michael Hanselmann | else:
|
111 | 4fd029cf | Michael Hanselmann | keyfunc = lambda value: _NiceSortKey(key(value))
|
112 | 4fd029cf | Michael Hanselmann | |
113 | 4fd029cf | Michael Hanselmann | return sorted(values, key=keyfunc) |