root / snf-pithos-app / pithos / api / short_url.py @ 5ec446aa
History | View | Annotate | Download (5.2 kB)
1 |
# Copyright (C) 2009 by Michael Fogleman
|
---|---|
2 |
#
|
3 |
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4 |
# of this software and associated documentation files (the "Software"), to deal
|
5 |
# in the Software without restriction, including without limitation the rights
|
6 |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7 |
# copies of the Software, and to permit persons to whom the Software is
|
8 |
# furnished to do so, subject to the following conditions:
|
9 |
#
|
10 |
# The above copyright notice and this permission notice shall be included in
|
11 |
# all copies or substantial portions of the Software.
|
12 |
#
|
13 |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14 |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15 |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16 |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17 |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18 |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19 |
# THE SOFTWARE.
|
20 |
|
21 |
'''
|
22 |
Short URL Generator
|
23 |
===================
|
24 |
|
25 |
Python implementation for generating Tiny URL- and bit.ly-like URLs.
|
26 |
|
27 |
A bit-shuffling approach is used to avoid generating consecutive, predictable
|
28 |
URLs. However, the algorithm is deterministic and will guarantee that no
|
29 |
collisions will occur.
|
30 |
|
31 |
The URL alphabet is fully customizable and may contain any number of
|
32 |
characters. By default, digits and lower-case letters are used, with
|
33 |
some removed to avoid confusion between characters like o, O and 0. The
|
34 |
default alphabet is shuffled and has a prime number of characters to further
|
35 |
improve the results of the algorithm.
|
36 |
|
37 |
The block size specifies how many bits will be shuffled. The lower BLOCK_SIZE
|
38 |
bits are reversed. Any bits higher than BLOCK_SIZE will remain as is.
|
39 |
BLOCK_SIZE of 0 will leave all bits unaffected and the algorithm will simply
|
40 |
be converting your integer to a different base.
|
41 |
|
42 |
The intended use is that incrementing, consecutive integers will be used as
|
43 |
keys to generate the short URLs. For example, when creating a new URL, the
|
44 |
unique integer ID assigned by a database could be used to generate the URL
|
45 |
by using this module. Or a simple counter may be used. As long as the same
|
46 |
integer is not used twice, the same short URL will not be generated twice.
|
47 |
|
48 |
The module supports both encoding and decoding of URLs. The min_length
|
49 |
parameter allows you to pad the URL if you want it to be a specific length.
|
50 |
|
51 |
Sample Usage:
|
52 |
|
53 |
>>> import short_url
|
54 |
>>> url = short_url.encode_url(12)
|
55 |
>>> print url
|
56 |
LhKA
|
57 |
>>> key = short_url.decode_url(url)
|
58 |
>>> print key
|
59 |
12
|
60 |
|
61 |
Use the functions in the top-level of the module to use the default encoder.
|
62 |
Otherwise, you may create your own UrlEncoder object and use its encode_url
|
63 |
and decode_url methods.
|
64 |
|
65 |
Author: Michael Fogleman
|
66 |
License: MIT
|
67 |
Link: http://code.activestate.com/recipes/576918/
|
68 |
'''
|
69 |
|
70 |
DEFAULT_ALPHABET = 'mn6j2c4rv8bpygw95z7hsdaetxuk3fq'
|
71 |
DEFAULT_BLOCK_SIZE = 24
|
72 |
MIN_LENGTH = 5
|
73 |
|
74 |
|
75 |
class UrlEncoder(object): |
76 |
def __init__(self, alphabet=DEFAULT_ALPHABET, block_size=DEFAULT_BLOCK_SIZE): |
77 |
self.alphabet = alphabet
|
78 |
self.block_size = block_size
|
79 |
self.mask = (1 << block_size) - 1 |
80 |
self.mapping = range(block_size) |
81 |
self.mapping.reverse()
|
82 |
|
83 |
def encode_url(self, n, min_length=MIN_LENGTH): |
84 |
return self.enbase(self.encode(n), min_length) |
85 |
|
86 |
def decode_url(self, n): |
87 |
return self.decode(self.debase(n)) |
88 |
|
89 |
def encode(self, n): |
90 |
return (n & ~self.mask) | self._encode(n & self.mask) |
91 |
|
92 |
def _encode(self, n): |
93 |
result = 0
|
94 |
for i, b in enumerate(self.mapping): |
95 |
if n & (1 << i): |
96 |
result |= (1 << b)
|
97 |
return result
|
98 |
|
99 |
def decode(self, n): |
100 |
return (n & ~self.mask) | self._decode(n & self.mask) |
101 |
|
102 |
def _decode(self, n): |
103 |
result = 0
|
104 |
for i, b in enumerate(self.mapping): |
105 |
if n & (1 << b): |
106 |
result |= (1 << i)
|
107 |
return result
|
108 |
|
109 |
def enbase(self, x, min_length=MIN_LENGTH): |
110 |
result = self._enbase(x)
|
111 |
padding = self.alphabet[0] * (min_length - len(result)) |
112 |
return '%s%s' % (padding, result) |
113 |
|
114 |
def _enbase(self, x): |
115 |
n = len(self.alphabet) |
116 |
if x < n:
|
117 |
return self.alphabet[x] |
118 |
return self._enbase(x / n) + self.alphabet[x % n] |
119 |
|
120 |
def debase(self, x): |
121 |
n = len(self.alphabet) |
122 |
result = 0
|
123 |
for i, c in enumerate(reversed(x)): |
124 |
result += self.alphabet.index(c) * (n ** i)
|
125 |
return result
|
126 |
|
127 |
DEFAULT_ENCODER = UrlEncoder() |
128 |
|
129 |
|
130 |
def encode(n): |
131 |
return DEFAULT_ENCODER.encode(n)
|
132 |
|
133 |
|
134 |
def decode(n): |
135 |
return DEFAULT_ENCODER.decode(n)
|
136 |
|
137 |
|
138 |
def enbase(n, min_length=MIN_LENGTH): |
139 |
return DEFAULT_ENCODER.enbase(n, min_length)
|
140 |
|
141 |
|
142 |
def debase(n): |
143 |
return DEFAULT_ENCODER.debase(n)
|
144 |
|
145 |
|
146 |
def encode_url(n, min_length=MIN_LENGTH): |
147 |
return DEFAULT_ENCODER.encode_url(n, min_length)
|
148 |
|
149 |
|
150 |
def decode_url(n): |
151 |
return DEFAULT_ENCODER.decode_url(n)
|
152 |
|
153 |
if __name__ == '__main__': |
154 |
for a in range(0, 200000, 37): |
155 |
b = encode(a) |
156 |
c = enbase(b) |
157 |
d = debase(c) |
158 |
e = decode(d) |
159 |
assert a == e
|
160 |
assert b == d
|
161 |
c = (' ' * (7 - len(c))) + c |
162 |
print '%6d %12d %s %12d %6d' % (a, b, c, d, e) |