root / snf-pithos-app / pithos / api / short_url.py @ 5ec446aa
History | View | Annotate | Download (5.2 kB)
1 | bb4eafc6 | Antony Chazapis | # Copyright (C) 2009 by Michael Fogleman
|
---|---|---|---|
2 | 2715ade4 | Sofia Papagiannaki | #
|
3 | bb4eafc6 | Antony Chazapis | # Permission is hereby granted, free of charge, to any person obtaining a copy
|
4 | bb4eafc6 | Antony Chazapis | # of this software and associated documentation files (the "Software"), to deal
|
5 | bb4eafc6 | Antony Chazapis | # in the Software without restriction, including without limitation the rights
|
6 | bb4eafc6 | Antony Chazapis | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7 | bb4eafc6 | Antony Chazapis | # copies of the Software, and to permit persons to whom the Software is
|
8 | bb4eafc6 | Antony Chazapis | # furnished to do so, subject to the following conditions:
|
9 | 2715ade4 | Sofia Papagiannaki | #
|
10 | bb4eafc6 | Antony Chazapis | # The above copyright notice and this permission notice shall be included in
|
11 | bb4eafc6 | Antony Chazapis | # all copies or substantial portions of the Software.
|
12 | 2715ade4 | Sofia Papagiannaki | #
|
13 | bb4eafc6 | Antony Chazapis | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14 | bb4eafc6 | Antony Chazapis | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15 | bb4eafc6 | Antony Chazapis | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16 | bb4eafc6 | Antony Chazapis | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17 | bb4eafc6 | Antony Chazapis | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18 | bb4eafc6 | Antony Chazapis | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19 | bb4eafc6 | Antony Chazapis | # THE SOFTWARE.
|
20 | bb4eafc6 | Antony Chazapis | |
21 | bb4eafc6 | Antony Chazapis | '''
|
22 | bb4eafc6 | Antony Chazapis | Short URL Generator
|
23 | bb4eafc6 | Antony Chazapis | ===================
|
24 | bb4eafc6 | Antony Chazapis |
|
25 | bb4eafc6 | Antony Chazapis | Python implementation for generating Tiny URL- and bit.ly-like URLs.
|
26 | bb4eafc6 | Antony Chazapis |
|
27 | 2715ade4 | Sofia Papagiannaki | A bit-shuffling approach is used to avoid generating consecutive, predictable
|
28 | 2715ade4 | Sofia Papagiannaki | URLs. However, the algorithm is deterministic and will guarantee that no
|
29 | bb4eafc6 | Antony Chazapis | collisions will occur.
|
30 | bb4eafc6 | Antony Chazapis |
|
31 | 2715ade4 | Sofia Papagiannaki | The URL alphabet is fully customizable and may contain any number of
|
32 | 2715ade4 | Sofia Papagiannaki | characters. By default, digits and lower-case letters are used, with
|
33 | 2715ade4 | Sofia Papagiannaki | some removed to avoid confusion between characters like o, O and 0. The
|
34 | 2715ade4 | Sofia Papagiannaki | default alphabet is shuffled and has a prime number of characters to further
|
35 | bb4eafc6 | Antony Chazapis | improve the results of the algorithm.
|
36 | bb4eafc6 | Antony Chazapis |
|
37 | 2715ade4 | Sofia Papagiannaki | The block size specifies how many bits will be shuffled. The lower BLOCK_SIZE
|
38 | bb4eafc6 | Antony Chazapis | bits are reversed. Any bits higher than BLOCK_SIZE will remain as is.
|
39 | 2715ade4 | Sofia Papagiannaki | BLOCK_SIZE of 0 will leave all bits unaffected and the algorithm will simply
|
40 | bb4eafc6 | Antony Chazapis | be converting your integer to a different base.
|
41 | bb4eafc6 | Antony Chazapis |
|
42 | 2715ade4 | Sofia Papagiannaki | The intended use is that incrementing, consecutive integers will be used as
|
43 | 2715ade4 | Sofia Papagiannaki | keys to generate the short URLs. For example, when creating a new URL, the
|
44 | 2715ade4 | Sofia Papagiannaki | unique integer ID assigned by a database could be used to generate the URL
|
45 | 2715ade4 | Sofia Papagiannaki | by using this module. Or a simple counter may be used. As long as the same
|
46 | bb4eafc6 | Antony Chazapis | integer is not used twice, the same short URL will not be generated twice.
|
47 | bb4eafc6 | Antony Chazapis |
|
48 | 2715ade4 | Sofia Papagiannaki | The module supports both encoding and decoding of URLs. The min_length
|
49 | bb4eafc6 | Antony Chazapis | parameter allows you to pad the URL if you want it to be a specific length.
|
50 | bb4eafc6 | Antony Chazapis |
|
51 | bb4eafc6 | Antony Chazapis | Sample Usage:
|
52 | bb4eafc6 | Antony Chazapis |
|
53 | bb4eafc6 | Antony Chazapis | >>> import short_url
|
54 | bb4eafc6 | Antony Chazapis | >>> url = short_url.encode_url(12)
|
55 | bb4eafc6 | Antony Chazapis | >>> print url
|
56 | bb4eafc6 | Antony Chazapis | LhKA
|
57 | bb4eafc6 | Antony Chazapis | >>> key = short_url.decode_url(url)
|
58 | bb4eafc6 | Antony Chazapis | >>> print key
|
59 | bb4eafc6 | Antony Chazapis | 12
|
60 | bb4eafc6 | Antony Chazapis |
|
61 | 2715ade4 | Sofia Papagiannaki | Use the functions in the top-level of the module to use the default encoder.
|
62 | 2715ade4 | Sofia Papagiannaki | Otherwise, you may create your own UrlEncoder object and use its encode_url
|
63 | bb4eafc6 | Antony Chazapis | and decode_url methods.
|
64 | bb4eafc6 | Antony Chazapis |
|
65 | bb4eafc6 | Antony Chazapis | Author: Michael Fogleman
|
66 | bb4eafc6 | Antony Chazapis | License: MIT
|
67 | bb4eafc6 | Antony Chazapis | Link: http://code.activestate.com/recipes/576918/
|
68 | bb4eafc6 | Antony Chazapis | '''
|
69 | bb4eafc6 | Antony Chazapis | |
70 | bb4eafc6 | Antony Chazapis | DEFAULT_ALPHABET = 'mn6j2c4rv8bpygw95z7hsdaetxuk3fq'
|
71 | bb4eafc6 | Antony Chazapis | DEFAULT_BLOCK_SIZE = 24
|
72 | bb4eafc6 | Antony Chazapis | MIN_LENGTH = 5
|
73 | bb4eafc6 | Antony Chazapis | |
74 | 2715ade4 | Sofia Papagiannaki | |
75 | bb4eafc6 | Antony Chazapis | class UrlEncoder(object): |
76 | bb4eafc6 | Antony Chazapis | def __init__(self, alphabet=DEFAULT_ALPHABET, block_size=DEFAULT_BLOCK_SIZE): |
77 | bb4eafc6 | Antony Chazapis | self.alphabet = alphabet
|
78 | bb4eafc6 | Antony Chazapis | self.block_size = block_size
|
79 | bb4eafc6 | Antony Chazapis | self.mask = (1 << block_size) - 1 |
80 | bb4eafc6 | Antony Chazapis | self.mapping = range(block_size) |
81 | bb4eafc6 | Antony Chazapis | self.mapping.reverse()
|
82 | 2715ade4 | Sofia Papagiannaki | |
83 | bb4eafc6 | Antony Chazapis | def encode_url(self, n, min_length=MIN_LENGTH): |
84 | bb4eafc6 | Antony Chazapis | return self.enbase(self.encode(n), min_length) |
85 | 2715ade4 | Sofia Papagiannaki | |
86 | bb4eafc6 | Antony Chazapis | def decode_url(self, n): |
87 | bb4eafc6 | Antony Chazapis | return self.decode(self.debase(n)) |
88 | 2715ade4 | Sofia Papagiannaki | |
89 | bb4eafc6 | Antony Chazapis | def encode(self, n): |
90 | bb4eafc6 | Antony Chazapis | return (n & ~self.mask) | self._encode(n & self.mask) |
91 | 2715ade4 | Sofia Papagiannaki | |
92 | bb4eafc6 | Antony Chazapis | def _encode(self, n): |
93 | bb4eafc6 | Antony Chazapis | result = 0
|
94 | bb4eafc6 | Antony Chazapis | for i, b in enumerate(self.mapping): |
95 | bb4eafc6 | Antony Chazapis | if n & (1 << i): |
96 | bb4eafc6 | Antony Chazapis | result |= (1 << b)
|
97 | bb4eafc6 | Antony Chazapis | return result
|
98 | 2715ade4 | Sofia Papagiannaki | |
99 | bb4eafc6 | Antony Chazapis | def decode(self, n): |
100 | bb4eafc6 | Antony Chazapis | return (n & ~self.mask) | self._decode(n & self.mask) |
101 | 2715ade4 | Sofia Papagiannaki | |
102 | bb4eafc6 | Antony Chazapis | def _decode(self, n): |
103 | bb4eafc6 | Antony Chazapis | result = 0
|
104 | bb4eafc6 | Antony Chazapis | for i, b in enumerate(self.mapping): |
105 | bb4eafc6 | Antony Chazapis | if n & (1 << b): |
106 | bb4eafc6 | Antony Chazapis | result |= (1 << i)
|
107 | bb4eafc6 | Antony Chazapis | return result
|
108 | 2715ade4 | Sofia Papagiannaki | |
109 | bb4eafc6 | Antony Chazapis | def enbase(self, x, min_length=MIN_LENGTH): |
110 | bb4eafc6 | Antony Chazapis | result = self._enbase(x)
|
111 | bb4eafc6 | Antony Chazapis | padding = self.alphabet[0] * (min_length - len(result)) |
112 | bb4eafc6 | Antony Chazapis | return '%s%s' % (padding, result) |
113 | 2715ade4 | Sofia Papagiannaki | |
114 | bb4eafc6 | Antony Chazapis | def _enbase(self, x): |
115 | bb4eafc6 | Antony Chazapis | n = len(self.alphabet) |
116 | bb4eafc6 | Antony Chazapis | if x < n:
|
117 | bb4eafc6 | Antony Chazapis | return self.alphabet[x] |
118 | bb4eafc6 | Antony Chazapis | return self._enbase(x / n) + self.alphabet[x % n] |
119 | 2715ade4 | Sofia Papagiannaki | |
120 | bb4eafc6 | Antony Chazapis | def debase(self, x): |
121 | bb4eafc6 | Antony Chazapis | n = len(self.alphabet) |
122 | bb4eafc6 | Antony Chazapis | result = 0
|
123 | bb4eafc6 | Antony Chazapis | for i, c in enumerate(reversed(x)): |
124 | bb4eafc6 | Antony Chazapis | result += self.alphabet.index(c) * (n ** i)
|
125 | bb4eafc6 | Antony Chazapis | return result
|
126 | bb4eafc6 | Antony Chazapis | |
127 | bb4eafc6 | Antony Chazapis | DEFAULT_ENCODER = UrlEncoder() |
128 | bb4eafc6 | Antony Chazapis | |
129 | 2715ade4 | Sofia Papagiannaki | |
130 | bb4eafc6 | Antony Chazapis | def encode(n): |
131 | bb4eafc6 | Antony Chazapis | return DEFAULT_ENCODER.encode(n)
|
132 | bb4eafc6 | Antony Chazapis | |
133 | 2715ade4 | Sofia Papagiannaki | |
134 | bb4eafc6 | Antony Chazapis | def decode(n): |
135 | bb4eafc6 | Antony Chazapis | return DEFAULT_ENCODER.decode(n)
|
136 | bb4eafc6 | Antony Chazapis | |
137 | 2715ade4 | Sofia Papagiannaki | |
138 | bb4eafc6 | Antony Chazapis | def enbase(n, min_length=MIN_LENGTH): |
139 | bb4eafc6 | Antony Chazapis | return DEFAULT_ENCODER.enbase(n, min_length)
|
140 | bb4eafc6 | Antony Chazapis | |
141 | 2715ade4 | Sofia Papagiannaki | |
142 | bb4eafc6 | Antony Chazapis | def debase(n): |
143 | bb4eafc6 | Antony Chazapis | return DEFAULT_ENCODER.debase(n)
|
144 | bb4eafc6 | Antony Chazapis | |
145 | 2715ade4 | Sofia Papagiannaki | |
146 | bb4eafc6 | Antony Chazapis | def encode_url(n, min_length=MIN_LENGTH): |
147 | bb4eafc6 | Antony Chazapis | return DEFAULT_ENCODER.encode_url(n, min_length)
|
148 | bb4eafc6 | Antony Chazapis | |
149 | 2715ade4 | Sofia Papagiannaki | |
150 | bb4eafc6 | Antony Chazapis | def decode_url(n): |
151 | bb4eafc6 | Antony Chazapis | return DEFAULT_ENCODER.decode_url(n)
|
152 | bb4eafc6 | Antony Chazapis | |
153 | bb4eafc6 | Antony Chazapis | if __name__ == '__main__': |
154 | bb4eafc6 | Antony Chazapis | for a in range(0, 200000, 37): |
155 | bb4eafc6 | Antony Chazapis | b = encode(a) |
156 | bb4eafc6 | Antony Chazapis | c = enbase(b) |
157 | bb4eafc6 | Antony Chazapis | d = debase(c) |
158 | bb4eafc6 | Antony Chazapis | e = decode(d) |
159 | bb4eafc6 | Antony Chazapis | assert a == e
|
160 | bb4eafc6 | Antony Chazapis | assert b == d
|
161 | bb4eafc6 | Antony Chazapis | c = (' ' * (7 - len(c))) + c |
162 | bb4eafc6 | Antony Chazapis | print '%6d %12d %s %12d %6d' % (a, b, c, d, e) |