--- /dev/null
+# Copyright (C) 2009 by Michael Fogleman
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+'''
+Short URL Generator
+===================
+
+Python implementation for generating Tiny URL- and bit.ly-like URLs.
+
+A bit-shuffling approach is used to avoid generating consecutive, predictable
+URLs. However, the algorithm is deterministic and will guarantee that no
+collisions will occur.
+
+The URL alphabet is fully customizable and may contain any number of
+characters. By default, digits and lower-case letters are used, with
+some removed to avoid confusion between characters like o, O and 0. The
+default alphabet is shuffled and has a prime number of characters to further
+improve the results of the algorithm.
+
+The block size specifies how many bits will be shuffled. The lower BLOCK_SIZE
+bits are reversed. Any bits higher than BLOCK_SIZE will remain as is.
+BLOCK_SIZE of 0 will leave all bits unaffected and the algorithm will simply
+be converting your integer to a different base.
+
+The intended use is that incrementing, consecutive integers will be used as
+keys to generate the short URLs. For example, when creating a new URL, the
+unique integer ID assigned by a database could be used to generate the URL
+by using this module. Or a simple counter may be used. As long as the same
+integer is not used twice, the same short URL will not be generated twice.
+
+The module supports both encoding and decoding of URLs. The min_length
+parameter allows you to pad the URL if you want it to be a specific length.
+
+Sample Usage:
+
+>>> import short_url
+>>> url = short_url.encode_url(12)
+>>> print url
+LhKA
+>>> key = short_url.decode_url(url)
+>>> print key
+12
+
+Use the functions in the top-level of the module to use the default encoder.
+Otherwise, you may create your own UrlEncoder object and use its encode_url
+and decode_url methods.
+
+Author: Michael Fogleman
+License: MIT
+Link: http://code.activestate.com/recipes/576918/
+'''
+
+DEFAULT_ALPHABET = 'mn6j2c4rv8bpygw95z7hsdaetxuk3fq'
+DEFAULT_BLOCK_SIZE = 24
+MIN_LENGTH = 5
+
+class UrlEncoder(object):
+ def __init__(self, alphabet=DEFAULT_ALPHABET, block_size=DEFAULT_BLOCK_SIZE):
+ self.alphabet = alphabet
+ self.block_size = block_size
+ self.mask = (1 << block_size) - 1
+ self.mapping = range(block_size)
+ self.mapping.reverse()
+ def encode_url(self, n, min_length=MIN_LENGTH):
+ return self.enbase(self.encode(n), min_length)
+ def decode_url(self, n):
+ return self.decode(self.debase(n))
+ def encode(self, n):
+ return (n & ~self.mask) | self._encode(n & self.mask)
+ def _encode(self, n):
+ result = 0
+ for i, b in enumerate(self.mapping):
+ if n & (1 << i):
+ result |= (1 << b)
+ return result
+ def decode(self, n):
+ return (n & ~self.mask) | self._decode(n & self.mask)
+ def _decode(self, n):
+ result = 0
+ for i, b in enumerate(self.mapping):
+ if n & (1 << b):
+ result |= (1 << i)
+ return result
+ def enbase(self, x, min_length=MIN_LENGTH):
+ result = self._enbase(x)
+ padding = self.alphabet[0] * (min_length - len(result))
+ return '%s%s' % (padding, result)
+ def _enbase(self, x):
+ n = len(self.alphabet)
+ if x < n:
+ return self.alphabet[x]
+ return self._enbase(x / n) + self.alphabet[x % n]
+ def debase(self, x):
+ n = len(self.alphabet)
+ result = 0
+ for i, c in enumerate(reversed(x)):
+ result += self.alphabet.index(c) * (n ** i)
+ return result
+
+DEFAULT_ENCODER = UrlEncoder()
+
+def encode(n):
+ return DEFAULT_ENCODER.encode(n)
+
+def decode(n):
+ return DEFAULT_ENCODER.decode(n)
+
+def enbase(n, min_length=MIN_LENGTH):
+ return DEFAULT_ENCODER.enbase(n, min_length)
+
+def debase(n):
+ return DEFAULT_ENCODER.debase(n)
+
+def encode_url(n, min_length=MIN_LENGTH):
+ return DEFAULT_ENCODER.encode_url(n, min_length)
+
+def decode_url(n):
+ return DEFAULT_ENCODER.decode_url(n)
+
+if __name__ == '__main__':
+ for a in range(0, 200000, 37):
+ b = encode(a)
+ c = enbase(b)
+ d = debase(c)
+ e = decode(d)
+ assert a == e
+ assert b == d
+ c = (' ' * (7 - len(c))) + c
+ print '%6d %12d %s %12d %6d' % (a, b, c, d, e)
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
RangeNotSatisfiable, ServiceUnavailable)
+from pithos.api.short_url import encode_url
from pithos.backends import connect_backend
from pithos.backends.base import NotAllowedError, QuotaError
def update_public_meta(public, meta):
if not public:
return
- meta['X-Object-Public'] = public
+ meta['X-Object-Public'] = '/public/' + encode_url(public)
def validate_modification_preconditions(request, meta):
"""Check that the modified timestamp conforms with the preconditions set."""
return
def get_object_public(self, user, account, container, name):
- """Return the public URL of the object if applicable.
+ """Return the public id of the object if applicable.
Raises:
NotAllowedError: Operation not permitted
"""
return []
+ def get_public(self, user, public):
+ """Return the (account, container, name) for the public id given.
+
+ Raises:
+ NotAllowedError: Operation not permitted
+
+ NameError: Public id does not exist
+ """
+ return None
+
def get_block(self, hash):
"""Return a block's data.
def access_check(self, path, access, member):
"""Return true if the member has this access to the path."""
- if access == READ and self.public_check(path):
+ if access == READ and self.public_get(path) is not None:
return True
r = self.xfeature_inherit(path)
# or implied, of GRNET S.A.
from dbworker import DBWorker
-from sqlalchemy import Table, Column, String, MetaData
+from sqlalchemy import Table, Column, String, Integer, MetaData
from sqlalchemy.sql import select
+from sqlalchemy.schema import Index
+
class Public(DBWorker):
"""Paths can be marked as public."""
DBWorker.__init__(self, **params)
metadata = MetaData()
columns=[]
- columns.append(Column('path', String(2048), index=True))
- self.public = Table('public', metadata, *columns, mysql_engine='InnoDB')
+ columns.append(Column('public_id', Integer, primary_key=True))
+ columns.append(Column('path', String(2048)))
+ self.public = Table('public', metadata, *columns, mysql_engine='InnoDB', sqlite_autoincrement=True)
+ # place an index on path
+ Index('idx_public_path', self.public.c.path)
metadata.create_all(self.engine)
-
def public_set(self, path):
s = self.public.select()
s = s.where(self.public.c.path == path)
r = self.conn.execute(s)
r.close()
- def public_check(self, path):
- s = select([self.public.c.path], self.public.c.path == path)
+ def public_get(self, path):
+ s = select([self.public.c.public_id], self.public.c.path == path)
+ r = self.conn.execute(s)
+ row = r.fetchone()
+ r.close()
+ if row:
+ return row[0]
+ return None
+
+ def public_path(self, public):
+ s = select([self.public.c.path], self.public.c.public_id == public)
r = self.conn.execute(s)
- l = r.fetchone()
+ row = r.fetchone()
r.close()
- return bool(l)
+ if row:
+ return row[0]
+ return None
def access_check(self, path, access, member):
"""Return true if the member has this access to the path."""
- if access == READ and self.public_check(path):
+ if access == READ and self.public_get(path) is not None:
return True
r = self.xfeature_inherit(path)
execute = self.execute
execute(""" create table if not exists public
- ( path text primary key ) """)
+ ( public_id integer primary key autoincrement,
+ path text ) """)
+ execute(""" create unique index if not exists idx_public_path
+ on public(path) """)
def public_set(self, path):
q = "insert or ignore into public (path) values (?)"
q = "delete from public where path = ?"
self.execute(q, (path,))
- def public_check(self, path):
- q = "select 1 from public where path = ?"
+ def public_get(self, path):
+ q = "select public_id from public where path = ?"
self.execute(q, (path,))
- return bool(self.fetchone())
+ row = self.fetchone()
+ if row:
+ return row[0]
+ return None
+
+ def public_path(self, public):
+ q = "select path from public where public_id = ?"
+ self.execute(q, (public,))
+ row = self.fetchone()
+ if row:
+ return row[0]
+ return None
inf = float('inf')
+ULTIMATE_ANSWER = 42
+
logger = logging.getLogger(__name__)
@backend_method
def get_object_public(self, user, account, container, name):
- """Return the public URL of the object if applicable."""
+ """Return the public id of the object if applicable."""
logger.debug("get_object_public: %s %s %s", account, container, name)
self._can_read(user, account, container, name)
path = self._lookup_object(account, container, name)[0]
- if self.permissions.public_check(path):
- return '/public/' + path
- return None
+ p = self.permissions.public_get(path)
+ if p is not None:
+ p += ULTIMATE_ANSWER
+ return p
@backend_method
def update_object_public(self, user, account, container, name, public):
versions = self.node.node_get_versions(node)
return [[x[self.SERIAL], x[self.MTIME]] for x in versions if x[self.CLUSTER] != CLUSTER_DELETED]
+ @backend_method
+ def get_public(self, user, public):
+ """Return the (account, container, name) for the public id given."""
+ logger.debug("get_public: %s", public)
+ if public is None or public < ULTIMATE_ANSWER:
+ raise NameError
+ path = self.permissions.public_path(public - ULTIMATE_ANSWER)
+ account, container, name = path.split('/', 2)
+ self._can_read(user, account, container, name)
+ return (account, container, name)
+
@backend_method(autocommit=0)
def get_block(self, hash):
"""Return a block's data."""
from pithos.api.util import (put_object_headers, update_manifest_meta,
validate_modification_preconditions, validate_matching_preconditions,
object_data_response, api_method)
+from pithos.api.short_url import decode_url
logger = logging.getLogger(__name__)
-def object_demux(request, v_account, v_container, v_object):
+def public_demux(request, v_public):
if request.method == 'HEAD':
- return object_meta(request, v_account, v_container, v_object)
+ return public_meta(request, v_public)
elif request.method == 'GET':
- return object_read(request, v_account, v_container, v_object)
+ return public_read(request, v_public)
else:
return method_not_allowed(request)
@api_method('HEAD', user_required=False)
-def object_meta(request, v_account, v_container, v_object):
+def public_meta(request, v_public):
# Normal Response Codes: 204
# Error Response Codes: serviceUnavailable (503),
# itemNotFound (404),
# badRequest (400)
try:
+ v_account, v_container, v_object = request.backend.get_public(request.user_uniq,
+ decode_url(v_public))
meta = request.backend.get_object_meta(request.user_uniq, v_account,
- v_container, v_object)
+ v_container, v_object)
public = request.backend.get_object_public(request.user_uniq, v_account,
v_container, v_object)
except:
return response
@api_method('GET', user_required=False)
-def object_read(request, v_account, v_container, v_object):
+def public_read(request, v_public):
# Normal Response Codes: 200, 206
# Error Response Codes: serviceUnavailable (503),
# rangeNotSatisfiable (416),
# notModified (304)
try:
+ v_account, v_container, v_object = request.backend.get_public(request.user_uniq,
+ decode_url(v_public))
meta = request.backend.get_object_meta(request.user_uniq, v_account,
- v_container, v_object)
+ v_container, v_object)
public = request.backend.get_object_public(request.user_uniq, v_account,
v_container, v_object)
except:
except:
raise ItemNotFound('Object does not exist')
+ if 'Content-Disposition' not in meta:
+ name = v_object.rstrip('/').split('/')[-1]
+ if not name:
+ name = v_public
+ meta['Content-Disposition'] = 'attachment; filename=%s' % (name,)
+
return object_data_response(request, sizes, hashmaps, meta, True)
@api_method(user_required=False)
from django.conf.urls.defaults import *
-# TODO: This only works when in this order.
urlpatterns = patterns('pithos.public.functions',
(r'^$', 'method_not_allowed'),
- (r'^(?P<v_account>.+?)/(?P<v_container>.+?)/(?P<v_object>.+?)$', 'object_demux'),
- (r'^(?P<v_account>.+?)/(?P<v_container>.+?)/?$', 'method_not_allowed'),
- (r'^(?P<v_account>.+?)/?$', 'method_not_allowed')
+ (r'^(?P<v_public>.+?)/?$', 'public_demux')
)