-# Copyright 2011 GRNET S.A. All rights reserved.
+# Copyright 2011-2012 GRNET S.A. All rights reserved.
#
# Redistribution and use in source and binary forms, with or
# without modification, are permitted provided that the following
from time import time
from traceback import format_exc
from wsgiref.handlers import format_date_time
-from binascii import hexlify
+from binascii import hexlify, unhexlify
+from datetime import datetime, tzinfo, timedelta
+from urllib import quote, unquote
from django.conf import settings
from django.http import HttpResponse
+from django.template.loader import render_to_string
from django.utils import simplejson as json
from django.utils.http import http_date, parse_etags
-from django.utils.encoding import smart_str
+from django.utils.encoding import smart_unicode, smart_str
+from django.core.files.uploadhandler import FileUploadHandler
+from django.core.files.uploadedfile import UploadedFile
-from pithos.api.compat import parse_http_date_safe, parse_http_date
-from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, ItemNotFound,
- Conflict, LengthRequired, PreconditionFailed, RangeNotSatisfiable,
- ServiceUnavailable)
-from pithos.backends import backend
-from pithos.backends.base import NotAllowedError
+from pithos.lib.compat import parse_http_date_safe, parse_http_date
+
+from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
+ Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
+ RangeNotSatisfiable, InternalServerError, NotImplemented)
+from pithos.api.short_url import encode_url
+from pithos.backends import connect_backend
+from pithos.backends.base import NotAllowedError, QuotaError
-import datetime
import logging
import re
import hashlib
import uuid
+import decimal
logger = logging.getLogger(__name__)
+class UTC(tzinfo):
+ def utcoffset(self, dt):
+ return timedelta(0)
+
+ def tzname(self, dt):
+ return 'UTC'
+
+ def dst(self, dt):
+ return timedelta(0)
+
+def json_encode_decimal(obj):
+ if isinstance(obj, decimal.Decimal):
+ return str(obj)
+ raise TypeError(repr(obj) + " is not JSON serializable")
+
+def isoformat(d):
+ """Return an ISO8601 date string that includes a timezone."""
+
+ return d.replace(tzinfo=UTC()).isoformat()
+
def rename_meta_key(d, old, new):
if old not in d:
return
Format 'last_modified' timestamp.
"""
- d['last_modified'] = datetime.datetime.fromtimestamp(int(d['last_modified'])).isoformat()
+ if 'last_modified' in d:
+ d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
def format_header_key(k):
if '-' in n or '_' in n:
raise BadRequest('Bad characters in group name')
groups[n] = v.replace(' ', '').split(',')
- if '' in groups[n]:
+ while '' in groups[n]:
groups[n].remove('')
return meta, groups
-def put_account_headers(response, meta, groups):
+def put_account_headers(response, meta, groups, policy):
if 'count' in meta:
response['X-Account-Container-Count'] = meta['count']
if 'bytes' in meta:
k = format_header_key('X-Account-Group-' + k)
v = smart_str(','.join(v), strings_only=True)
response[k] = v
-
+ for k, v in policy.iteritems():
+ response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
+
def get_container_headers(request):
meta = get_header_prefix(request, 'X-Container-Meta-')
policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
return meta, policy
-def put_container_headers(response, meta, policy):
+def put_container_headers(request, response, meta, policy):
if 'count' in meta:
response['X-Container-Object-Count'] = meta['count']
if 'bytes' in meta:
response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
- response['X-Container-Block-Size'] = backend.block_size
- response['X-Container-Block-Hash'] = backend.hash_algorithm
+ response['X-Container-Block-Size'] = request.backend.block_size
+ response['X-Container-Block-Hash'] = request.backend.hash_algorithm
if 'until_timestamp' in meta:
response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
for k, v in policy.iteritems():
return meta, get_sharing(request), get_public(request)
def put_object_headers(response, meta, restricted=False):
- response['ETag'] = meta['hash']
+ if 'ETag' in meta:
+ response['ETag'] = meta['ETag']
response['Content-Length'] = meta['bytes']
response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
response['Last-Modified'] = http_date(int(meta['modified']))
if not restricted:
+ response['X-Object-Hash'] = meta['hash']
+ response['X-Object-UUID'] = meta['uuid']
response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
response['X-Object-Version'] = meta['version']
response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
- for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest', 'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Public'):
+ for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
+ 'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
+ 'X-Object-Public'):
if k in meta:
response[k] = smart_str(meta[k], strings_only=True)
else:
for k in ('Content-Encoding', 'Content-Disposition'):
if k in meta:
- response[k] = meta[k]
+ response[k] = smart_str(meta[k], strings_only=True)
def update_manifest_meta(request, v_account, meta):
"""Update metadata if the object has an X-Object-Manifest."""
if 'X-Object-Manifest' in meta:
- hash = ''
+ etag = ''
bytes = 0
try:
src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
- objects = backend.list_objects(request.user, v_account, src_container, prefix=src_name, virtual=False)
+ objects = request.backend.list_objects(request.user_uniq, v_account,
+ src_container, prefix=src_name, virtual=False)
for x in objects:
- src_meta = backend.get_object_meta(request.user, v_account, src_container, x[0], x[1])
- hash += src_meta['hash']
+ src_meta = request.backend.get_object_meta(request.user_uniq,
+ v_account, src_container, x[0], 'pithos', x[1])
+ if 'ETag' in src_meta:
+ etag += src_meta['ETag']
bytes += src_meta['bytes']
except:
# Ignore errors.
return
meta['bytes'] = bytes
md5 = hashlib.md5()
- md5.update(hash)
- meta['hash'] = md5.hexdigest().lower()
+ md5.update(etag)
+ meta['ETag'] = md5.hexdigest().lower()
-def update_sharing_meta(permissions, v_account, v_container, v_object, meta):
+def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
if permissions is None:
return
- perm_path, perms = permissions
+ allowed, perm_path, perms = permissions
if len(perms) == 0:
return
ret = []
meta['X-Object-Sharing'] = '; '.join(ret)
if '/'.join((v_account, v_container, v_object)) != perm_path:
meta['X-Object-Shared-By'] = perm_path
+ if request.user_uniq != v_account:
+ meta['X-Object-Allowed-To'] = allowed
def update_public_meta(public, meta):
if not public:
return
- meta['X-Object-Public'] = public
+ meta['X-Object-Public'] = '/public/' + encode_url(public)
def validate_modification_preconditions(request, meta):
"""Check that the modified timestamp conforms with the preconditions set."""
def validate_matching_preconditions(request, meta):
"""Check that the ETag conforms with the preconditions set."""
- hash = meta.get('hash', None)
+ etag = meta.get('ETag', None)
if_match = request.META.get('HTTP_IF_MATCH')
if if_match is not None:
- if hash is None:
+ if etag is None:
raise PreconditionFailed('Resource does not exist')
- if if_match != '*' and hash not in [x.lower() for x in parse_etags(if_match)]:
+ if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
raise PreconditionFailed('Resource ETag does not match')
if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
if if_none_match is not None:
# TODO: If this passes, must ignore If-Modified-Since header.
- if hash is not None:
- if if_none_match == '*' or hash in [x.lower() for x in parse_etags(if_none_match)]:
+ if etag is not None:
+ if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
# TODO: Continue if an If-Modified-Since header is present.
if request.method in ('HEAD', 'GET'):
raise NotModified('Resource ETag matches')
raise ValueError
s = s[1:]
pos = s.find('/')
- if pos == -1:
+ if pos == -1 or pos == len(s) - 1:
raise ValueError
return s[:pos], s[(pos + 1):]
-def copy_or_move_object(request, v_account, src_container, src_name, dest_container, dest_name, move=False):
+def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
"""Copy or move an object."""
+ if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
+ del(request.META['CONTENT_TYPE'])
meta, permissions, public = get_object_headers(request)
- src_version = request.META.get('HTTP_X_SOURCE_VERSION')
+ src_version = request.META.get('HTTP_X_SOURCE_VERSION')
try:
if move:
- backend.move_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, False, permissions)
+ version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
+ dest_account, dest_container, dest_name,
+ 'pithos', meta, False, permissions)
else:
- backend.copy_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, False, permissions, src_version)
+ version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
+ dest_account, dest_container, dest_name,
+ 'pithos', meta, False, permissions, src_version)
except NotAllowedError:
- raise Unauthorized('Access denied')
+ raise Forbidden('Not allowed')
except (NameError, IndexError):
raise ItemNotFound('Container or object does not exist')
except ValueError:
raise BadRequest('Invalid sharing header')
except AttributeError, e:
- raise Conflict(json.dumps(e.data))
+ raise Conflict(simple_list_response(request, e.data))
+ except QuotaError:
+ raise RequestEntityTooLarge('Quota exceeded')
if public is not None:
try:
- backend.update_object_public(request.user, v_account, dest_container, dest_name, public)
+ request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
except NotAllowedError:
- raise Unauthorized('Access denied')
+ raise Forbidden('Not allowed')
except NameError:
raise ItemNotFound('Object does not exist')
+ return version_id
def get_int_parameter(p):
if p is not None:
raise BadRequest('Bad X-Object-Sharing header value')
else:
raise BadRequest('Bad X-Object-Sharing header value')
+
+ # Keep duplicates only in write list.
+ dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
+ if dups:
+ for x in dups:
+ ret['read'].remove(x)
+ if len(ret['read']) == 0:
+ del(ret['read'])
+
return ret
def get_public(request):
return request._req
if 'wsgi.input' in request.environ:
return request.environ['wsgi.input']
- raise ServiceUnavailable('Unknown server software')
+ raise NotImplemented('Unknown server software')
-MAX_UPLOAD_SIZE = 10 * (1024 * 1024) # 10MB
+MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
def socket_read_iterator(request, length=0, blocksize=4096):
"""Return a maximum of blocksize data read from the socket in each iteration.
sock = raw_input_socket(request)
if length < 0: # Chunked transfers
# Small version (server does the dechunking).
- if request.environ.get('mod_wsgi.input_chunked', None):
+ if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
while length < MAX_UPLOAD_SIZE:
data = sock.read(blocksize)
if data == '':
raise BadRequest('Maximum size is reached')
while length > 0:
data = sock.read(min(length, blocksize))
+ if not data:
+ raise BadRequest()
length -= len(data)
yield data
+class SaveToBackendHandler(FileUploadHandler):
+ """Handle a file from an HTML form the django way."""
+
+ def __init__(self, request=None):
+ super(SaveToBackendHandler, self).__init__(request)
+ self.backend = request.backend
+
+ def put_data(self, length):
+ if len(self.data) >= length:
+ block = self.data[:length]
+ self.file.hashmap.append(self.backend.put_block(block))
+ self.md5.update(block)
+ self.data = self.data[length:]
+
+ def new_file(self, field_name, file_name, content_type, content_length, charset=None):
+ self.md5 = hashlib.md5()
+ self.data = ''
+ self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
+ self.file.size = 0
+ self.file.hashmap = []
+
+ def receive_data_chunk(self, raw_data, start):
+ self.data += raw_data
+ self.file.size += len(raw_data)
+ self.put_data(self.request.backend.block_size)
+ return None
+
+ def file_complete(self, file_size):
+ l = len(self.data)
+ if l > 0:
+ self.put_data(l)
+ self.file.etag = self.md5.hexdigest().lower()
+ return self.file
+
class ObjectWrapper(object):
"""Return the object's data block-per-block in each iteration.
Read from the object using the offset and length provided in each entry of the range list.
"""
- def __init__(self, ranges, sizes, hashmaps, boundary):
+ def __init__(self, backend, ranges, sizes, hashmaps, boundary):
+ self.backend = backend
self.ranges = ranges
self.sizes = sizes
self.hashmaps = hashmaps
file_size = self.sizes[self.file_index]
# Get the block for the current position.
- self.block_index = int(self.offset / backend.block_size)
+ self.block_index = int(self.offset / self.backend.block_size)
if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
self.block_hash = self.hashmaps[self.file_index][self.block_index]
try:
- self.block = backend.get_block(self.block_hash)
+ self.block = self.backend.get_block(self.block_hash)
except NameError:
raise ItemNotFound('Block does not exist')
# Get the data from the block.
- bo = self.offset % backend.block_size
+ bo = self.offset % self.backend.block_size
bl = min(self.length, len(self.block) - bo)
data = self.block[bo:bo + bl]
self.offset += bl
ranges = [(0, size)]
ret = 200
except ValueError:
- if if_range != meta['hash']:
+ if if_range != meta['ETag']:
ranges = [(0, size)]
ret = 200
boundary = uuid.uuid4().hex
else:
boundary = ''
- wrapper = ObjectWrapper(ranges, sizes, hashmaps, boundary)
+ wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
response = HttpResponse(wrapper, status=ret)
put_object_headers(response, meta, public)
if ret == 206:
response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
return response
-def put_object_block(hashmap, data, offset):
+def put_object_block(request, hashmap, data, offset):
"""Put one block of data at the given offset."""
- bi = int(offset / backend.block_size)
- bo = offset % backend.block_size
- bl = min(len(data), backend.block_size - bo)
+ bi = int(offset / request.backend.block_size)
+ bo = offset % request.backend.block_size
+ bl = min(len(data), request.backend.block_size - bo)
if bi < len(hashmap):
- hashmap[bi] = backend.update_block(hashmap[bi], data[:bl], bo)
+ hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
else:
- hashmap.append(backend.put_block(('\x00' * bo) + data[:bl]))
+ hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
return bl # Return ammount of data written.
-def hashmap_hash(hashmap):
- """Produce the root hash, treating the hashmap as a Merkle-like tree."""
-
- def subhash(d):
- h = hashlib.new(backend.hash_algorithm)
- h.update(d)
- return h.digest()
-
- if len(hashmap) == 0:
- return hexlify(subhash(''))
- if len(hashmap) == 1:
- return hashmap[0]
- s = 2
- while s < len(hashmap):
- s = s * 2
- h = hashmap + ([('\x00' * len(hashmap[0]))] * (s - len(hashmap)))
- h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)]
- while len(h) > 1:
- h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)]
- return hexlify(h[0])
+def hashmap_md5(request, hashmap, size):
+ """Produce the MD5 sum from the data in the hashmap."""
+
+ # TODO: Search backend for the MD5 of another object with the same hashmap and size...
+ md5 = hashlib.md5()
+ bs = request.backend.block_size
+ for bi, hash in enumerate(hashmap):
+ data = request.backend.get_block(hash)
+ if bi == len(hashmap) - 1:
+ bs = size % bs
+ pad = bs - min(len(data), bs)
+ md5.update(data + ('\x00' * pad))
+ return md5.hexdigest().lower()
+
+def simple_list_response(request, l):
+ if request.serialization == 'text':
+ return '\n'.join(l) + '\n'
+ if request.serialization == 'xml':
+ return render_to_string('items.xml', {'items': l})
+ if request.serialization == 'json':
+ return json.dumps(l)
+
+def get_backend():
+ backend = connect_backend(db_module=settings.BACKEND_DB_MODULE,
+ db_connection=settings.BACKEND_DB_CONNECTION,
+ block_module=settings.BACKEND_BLOCK_MODULE,
+ block_path=settings.BACKEND_BLOCK_PATH)
+ backend.default_policy['quota'] = settings.BACKEND_QUOTA
+ backend.default_policy['versioning'] = settings.BACKEND_VERSIONING
+ return backend
+
+def update_request_headers(request):
+ # Handle URL-encoded keys and values.
+ # Handle URL-encoded keys and values.
+ meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
+ if len(meta) > 90:
+ raise BadRequest('Too many headers.')
+ for k, v in meta.iteritems():
+ if len(k) > 128:
+ raise BadRequest('Header name too large.')
+ if len(v) > 256:
+ raise BadRequest('Header value too large.')
+ try:
+ k.decode('ascii')
+ v.decode('ascii')
+ except UnicodeDecodeError:
+ raise BadRequest('Bad character in headers.')
+ if '%' in k or '%' in v:
+ del(request.META[k])
+ request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
def update_response_headers(request, response):
if request.serialization == 'xml':
elif not response['Content-Type']:
response['Content-Type'] = 'text/plain; charset=UTF-8'
- if not response.has_header('Content-Length') and not (response.has_header('Content-Type') and response['Content-Type'].startswith('multipart/byteranges')):
+ if (not response.has_header('Content-Length') and
+ not (response.has_header('Content-Type') and
+ response['Content-Type'].startswith('multipart/byteranges'))):
response['Content-Length'] = len(response.content)
+ # URL-encode unicode in headers.
+ meta = response.items()
+ for k, v in meta:
+ if (k.startswith('X-Account-') or k.startswith('X-Container-') or
+ k.startswith('X-Object-') or k.startswith('Content-')):
+ del(response[k])
+ response[quote(k)] = quote(v, safe='/=,:@; ')
+
if settings.TEST:
response['Date'] = format_date_time(time())
def render_fault(request, fault):
- if settings.DEBUG or settings.TEST:
+ if isinstance(fault, InternalServerError) and (settings.DEBUG or settings.TEST):
fault.details = format_exc(fault)
request.serialization = 'text'
- data = '\n'.join((fault.message, fault.details)) + '\n'
+ data = fault.message + '\n'
+ if fault.details:
+ data += '\n' + fault.details
response = HttpResponse(data, status=fault.code)
update_response_headers(request, response)
return response
elif format == 'xml':
return 'xml'
-# for item in request.META.get('HTTP_ACCEPT', '').split(','):
-# accept, sep, rest = item.strip().partition(';')
-# if accept == 'application/json':
-# return 'json'
-# elif accept == 'application/xml' or accept == 'text/xml':
-# return 'xml'
+ for item in request.META.get('HTTP_ACCEPT', '').split(','):
+ accept, sep, rest = item.strip().partition(';')
+ if accept == 'application/json':
+ return 'json'
+ elif accept == 'application/xml' or accept == 'text/xml':
+ return 'xml'
return 'text'
-def api_method(http_method=None, format_allowed=False):
+def api_method(http_method=None, format_allowed=False, user_required=True):
"""Decorator function for views that implement an API method."""
def decorator(func):
try:
if http_method and request.method != http_method:
raise BadRequest('Method not allowed.')
+ if user_required and getattr(request, 'user', None) is None:
+ raise Unauthorized('Access denied')
# The args variable may contain up to (account, container, object).
if len(args) > 1 and len(args[1]) > 256:
if len(args) > 2 and len(args[2]) > 1024:
raise BadRequest('Object name too large.')
+ # Format and check headers.
+ update_request_headers(request)
+
# Fill in custom request variables.
request.serialization = request_serialization(request, format_allowed)
+ request.backend = get_backend()
response = func(request, *args, **kwargs)
update_response_headers(request, response)
return render_fault(request, fault)
except BaseException, e:
logger.exception('Unexpected error: %s' % e)
- fault = ServiceUnavailable('Unexpected error')
+ fault = InternalServerError('Unexpected error')
return render_fault(request, fault)
+ finally:
+ if getattr(request, 'backend', None) is not None:
+ request.backend.close()
return wrapper
return decorator