Revision d50ed8d4 snf-pithos-app/pithos/api/short_url.py
b/snf-pithos-app/pithos/api/short_url.py | ||
---|---|---|
1 | 1 |
# Copyright (C) 2009 by Michael Fogleman |
2 |
#
|
|
2 |
# |
|
3 | 3 |
# Permission is hereby granted, free of charge, to any person obtaining a copy |
4 | 4 |
# of this software and associated documentation files (the "Software"), to deal |
5 | 5 |
# in the Software without restriction, including without limitation the rights |
6 | 6 |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
7 | 7 |
# copies of the Software, and to permit persons to whom the Software is |
8 | 8 |
# furnished to do so, subject to the following conditions: |
9 |
#
|
|
9 |
# |
|
10 | 10 |
# The above copyright notice and this permission notice shall be included in |
11 | 11 |
# all copies or substantial portions of the Software. |
12 |
#
|
|
12 |
# |
|
13 | 13 |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | 14 |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | 15 |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
... | ... | |
24 | 24 |
|
25 | 25 |
Python implementation for generating Tiny URL- and bit.ly-like URLs. |
26 | 26 |
|
27 |
A bit-shuffling approach is used to avoid generating consecutive, predictable
|
|
28 |
URLs. However, the algorithm is deterministic and will guarantee that no
|
|
27 |
A bit-shuffling approach is used to avoid generating consecutive, predictable |
|
28 |
URLs. However, the algorithm is deterministic and will guarantee that no |
|
29 | 29 |
collisions will occur. |
30 | 30 |
|
31 |
The URL alphabet is fully customizable and may contain any number of
|
|
32 |
characters. By default, digits and lower-case letters are used, with
|
|
33 |
some removed to avoid confusion between characters like o, O and 0. The
|
|
34 |
default alphabet is shuffled and has a prime number of characters to further
|
|
31 |
The URL alphabet is fully customizable and may contain any number of |
|
32 |
characters. By default, digits and lower-case letters are used, with |
|
33 |
some removed to avoid confusion between characters like o, O and 0. The |
|
34 |
default alphabet is shuffled and has a prime number of characters to further |
|
35 | 35 |
improve the results of the algorithm. |
36 | 36 |
|
37 |
The block size specifies how many bits will be shuffled. The lower BLOCK_SIZE
|
|
37 |
The block size specifies how many bits will be shuffled. The lower BLOCK_SIZE |
|
38 | 38 |
bits are reversed. Any bits higher than BLOCK_SIZE will remain as is. |
39 |
BLOCK_SIZE of 0 will leave all bits unaffected and the algorithm will simply
|
|
39 |
BLOCK_SIZE of 0 will leave all bits unaffected and the algorithm will simply |
|
40 | 40 |
be converting your integer to a different base. |
41 | 41 |
|
42 |
The intended use is that incrementing, consecutive integers will be used as
|
|
43 |
keys to generate the short URLs. For example, when creating a new URL, the
|
|
44 |
unique integer ID assigned by a database could be used to generate the URL
|
|
45 |
by using this module. Or a simple counter may be used. As long as the same
|
|
42 |
The intended use is that incrementing, consecutive integers will be used as |
|
43 |
keys to generate the short URLs. For example, when creating a new URL, the |
|
44 |
unique integer ID assigned by a database could be used to generate the URL |
|
45 |
by using this module. Or a simple counter may be used. As long as the same |
|
46 | 46 |
integer is not used twice, the same short URL will not be generated twice. |
47 | 47 |
|
48 |
The module supports both encoding and decoding of URLs. The min_length
|
|
48 |
The module supports both encoding and decoding of URLs. The min_length |
|
49 | 49 |
parameter allows you to pad the URL if you want it to be a specific length. |
50 | 50 |
|
51 | 51 |
Sample Usage: |
... | ... | |
58 | 58 |
>>> print key |
59 | 59 |
12 |
60 | 60 |
|
61 |
Use the functions in the top-level of the module to use the default encoder.
|
|
62 |
Otherwise, you may create your own UrlEncoder object and use its encode_url
|
|
61 |
Use the functions in the top-level of the module to use the default encoder. |
|
62 |
Otherwise, you may create your own UrlEncoder object and use its encode_url |
|
63 | 63 |
and decode_url methods. |
64 | 64 |
|
65 | 65 |
Author: Michael Fogleman |
... | ... | |
71 | 71 |
DEFAULT_BLOCK_SIZE = 24 |
72 | 72 |
MIN_LENGTH = 5 |
73 | 73 |
|
74 |
|
|
74 | 75 |
class UrlEncoder(object): |
75 | 76 |
def __init__(self, alphabet=DEFAULT_ALPHABET, block_size=DEFAULT_BLOCK_SIZE): |
76 | 77 |
self.alphabet = alphabet |
... | ... | |
78 | 79 |
self.mask = (1 << block_size) - 1 |
79 | 80 |
self.mapping = range(block_size) |
80 | 81 |
self.mapping.reverse() |
82 |
|
|
81 | 83 |
def encode_url(self, n, min_length=MIN_LENGTH): |
82 | 84 |
return self.enbase(self.encode(n), min_length) |
85 |
|
|
83 | 86 |
def decode_url(self, n): |
84 | 87 |
return self.decode(self.debase(n)) |
88 |
|
|
85 | 89 |
def encode(self, n): |
86 | 90 |
return (n & ~self.mask) | self._encode(n & self.mask) |
91 |
|
|
87 | 92 |
def _encode(self, n): |
88 | 93 |
result = 0 |
89 | 94 |
for i, b in enumerate(self.mapping): |
90 | 95 |
if n & (1 << i): |
91 | 96 |
result |= (1 << b) |
92 | 97 |
return result |
98 |
|
|
93 | 99 |
def decode(self, n): |
94 | 100 |
return (n & ~self.mask) | self._decode(n & self.mask) |
101 |
|
|
95 | 102 |
def _decode(self, n): |
96 | 103 |
result = 0 |
97 | 104 |
for i, b in enumerate(self.mapping): |
98 | 105 |
if n & (1 << b): |
99 | 106 |
result |= (1 << i) |
100 | 107 |
return result |
108 |
|
|
101 | 109 |
def enbase(self, x, min_length=MIN_LENGTH): |
102 | 110 |
result = self._enbase(x) |
103 | 111 |
padding = self.alphabet[0] * (min_length - len(result)) |
104 | 112 |
return '%s%s' % (padding, result) |
113 |
|
|
105 | 114 |
def _enbase(self, x): |
106 | 115 |
n = len(self.alphabet) |
107 | 116 |
if x < n: |
108 | 117 |
return self.alphabet[x] |
109 | 118 |
return self._enbase(x / n) + self.alphabet[x % n] |
119 |
|
|
110 | 120 |
def debase(self, x): |
111 | 121 |
n = len(self.alphabet) |
112 | 122 |
result = 0 |
... | ... | |
116 | 126 |
|
117 | 127 |
DEFAULT_ENCODER = UrlEncoder() |
118 | 128 |
|
129 |
|
|
119 | 130 |
def encode(n): |
120 | 131 |
return DEFAULT_ENCODER.encode(n) |
121 | 132 |
|
133 |
|
|
122 | 134 |
def decode(n): |
123 | 135 |
return DEFAULT_ENCODER.decode(n) |
124 | 136 |
|
137 |
|
|
125 | 138 |
def enbase(n, min_length=MIN_LENGTH): |
126 | 139 |
return DEFAULT_ENCODER.enbase(n, min_length) |
127 | 140 |
|
141 |
|
|
128 | 142 |
def debase(n): |
129 | 143 |
return DEFAULT_ENCODER.debase(n) |
130 | 144 |
|
145 |
|
|
131 | 146 |
def encode_url(n, min_length=MIN_LENGTH): |
132 | 147 |
return DEFAULT_ENCODER.encode_url(n, min_length) |
133 | 148 |
|
149 |
|
|
134 | 150 |
def decode_url(n): |
135 | 151 |
return DEFAULT_ENCODER.decode_url(n) |
136 | 152 |
|
Also available in: Unified diff