Revision d50ed8d4 snf-pithos-app/pithos/api/short_url.py

b/snf-pithos-app/pithos/api/short_url.py
1 1
# Copyright (C) 2009 by Michael Fogleman
2
# 
2
#
3 3
# Permission is hereby granted, free of charge, to any person obtaining a copy
4 4
# of this software and associated documentation files (the "Software"), to deal
5 5
# in the Software without restriction, including without limitation the rights
6 6
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 7
# copies of the Software, and to permit persons to whom the Software is
8 8
# furnished to do so, subject to the following conditions:
9
# 
9
#
10 10
# The above copyright notice and this permission notice shall be included in
11 11
# all copies or substantial portions of the Software.
12
# 
12
#
13 13
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 14
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 15
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
......
24 24

  
25 25
Python implementation for generating Tiny URL- and bit.ly-like URLs.
26 26

  
27
A bit-shuffling approach is used to avoid generating consecutive, predictable 
28
URLs.  However, the algorithm is deterministic and will guarantee that no 
27
A bit-shuffling approach is used to avoid generating consecutive, predictable
28
URLs.  However, the algorithm is deterministic and will guarantee that no
29 29
collisions will occur.
30 30

  
31
The URL alphabet is fully customizable and may contain any number of 
32
characters.  By default, digits and lower-case letters are used, with 
33
some removed to avoid confusion between characters like o, O and 0.  The 
34
default alphabet is shuffled and has a prime number of characters to further 
31
The URL alphabet is fully customizable and may contain any number of
32
characters.  By default, digits and lower-case letters are used, with
33
some removed to avoid confusion between characters like o, O and 0.  The
34
default alphabet is shuffled and has a prime number of characters to further
35 35
improve the results of the algorithm.
36 36

  
37
The block size specifies how many bits will be shuffled.  The lower BLOCK_SIZE 
37
The block size specifies how many bits will be shuffled.  The lower BLOCK_SIZE
38 38
bits are reversed.  Any bits higher than BLOCK_SIZE will remain as is.
39
BLOCK_SIZE of 0 will leave all bits unaffected and the algorithm will simply 
39
BLOCK_SIZE of 0 will leave all bits unaffected and the algorithm will simply
40 40
be converting your integer to a different base.
41 41

  
42
The intended use is that incrementing, consecutive integers will be used as 
43
keys to generate the short URLs.  For example, when creating a new URL, the 
44
unique integer ID assigned by a database could be used to generate the URL 
45
by using this module.  Or a simple counter may be used.  As long as the same 
42
The intended use is that incrementing, consecutive integers will be used as
43
keys to generate the short URLs.  For example, when creating a new URL, the
44
unique integer ID assigned by a database could be used to generate the URL
45
by using this module.  Or a simple counter may be used.  As long as the same
46 46
integer is not used twice, the same short URL will not be generated twice.
47 47

  
48
The module supports both encoding and decoding of URLs. The min_length 
48
The module supports both encoding and decoding of URLs. The min_length
49 49
parameter allows you to pad the URL if you want it to be a specific length.
50 50

  
51 51
Sample Usage:
......
58 58
>>> print key
59 59
12
60 60

  
61
Use the functions in the top-level of the module to use the default encoder. 
62
Otherwise, you may create your own UrlEncoder object and use its encode_url 
61
Use the functions in the top-level of the module to use the default encoder.
62
Otherwise, you may create your own UrlEncoder object and use its encode_url
63 63
and decode_url methods.
64 64

  
65 65
Author: Michael Fogleman
......
71 71
DEFAULT_BLOCK_SIZE = 24
72 72
MIN_LENGTH = 5
73 73

  
74

  
74 75
class UrlEncoder(object):
75 76
    def __init__(self, alphabet=DEFAULT_ALPHABET, block_size=DEFAULT_BLOCK_SIZE):
76 77
        self.alphabet = alphabet
......
78 79
        self.mask = (1 << block_size) - 1
79 80
        self.mapping = range(block_size)
80 81
        self.mapping.reverse()
82

  
81 83
    def encode_url(self, n, min_length=MIN_LENGTH):
82 84
        return self.enbase(self.encode(n), min_length)
85

  
83 86
    def decode_url(self, n):
84 87
        return self.decode(self.debase(n))
88

  
85 89
    def encode(self, n):
86 90
        return (n & ~self.mask) | self._encode(n & self.mask)
91

  
87 92
    def _encode(self, n):
88 93
        result = 0
89 94
        for i, b in enumerate(self.mapping):
90 95
            if n & (1 << i):
91 96
                result |= (1 << b)
92 97
        return result
98

  
93 99
    def decode(self, n):
94 100
        return (n & ~self.mask) | self._decode(n & self.mask)
101

  
95 102
    def _decode(self, n):
96 103
        result = 0
97 104
        for i, b in enumerate(self.mapping):
98 105
            if n & (1 << b):
99 106
                result |= (1 << i)
100 107
        return result
108

  
101 109
    def enbase(self, x, min_length=MIN_LENGTH):
102 110
        result = self._enbase(x)
103 111
        padding = self.alphabet[0] * (min_length - len(result))
104 112
        return '%s%s' % (padding, result)
113

  
105 114
    def _enbase(self, x):
106 115
        n = len(self.alphabet)
107 116
        if x < n:
108 117
            return self.alphabet[x]
109 118
        return self._enbase(x / n) + self.alphabet[x % n]
119

  
110 120
    def debase(self, x):
111 121
        n = len(self.alphabet)
112 122
        result = 0
......
116 126

  
117 127
DEFAULT_ENCODER = UrlEncoder()
118 128

  
129

  
119 130
def encode(n):
120 131
    return DEFAULT_ENCODER.encode(n)
121 132

  
133

  
122 134
def decode(n):
123 135
    return DEFAULT_ENCODER.decode(n)
124 136

  
137

  
125 138
def enbase(n, min_length=MIN_LENGTH):
126 139
    return DEFAULT_ENCODER.enbase(n, min_length)
127 140

  
141

  
128 142
def debase(n):
129 143
    return DEFAULT_ENCODER.debase(n)
130 144

  
145

  
131 146
def encode_url(n, min_length=MIN_LENGTH):
132 147
    return DEFAULT_ENCODER.encode_url(n, min_length)
133 148

  
149

  
134 150
def decode_url(n):
135 151
    return DEFAULT_ENCODER.decode_url(n)
136 152

  

Also available in: Unified diff