X-Git-Url: https://code.grnet.gr/git/pithos/blobdiff_plain/ab2e317e7fd06d419c93e11e03f6ac35f70f1aad..f9f15f9298274c5af6c94599bd46e663c73414b3:/pithos/api/util.py diff --git a/pithos/api/util.py b/pithos/api/util.py index c8d3e1c..cc486bd 100644 --- a/pithos/api/util.py +++ b/pithos/api/util.py @@ -35,12 +35,13 @@ from functools import wraps from time import time from traceback import format_exc from wsgiref.handlers import format_date_time -from binascii import hexlify +from binascii import hexlify, unhexlify from django.conf import settings from django.http import HttpResponse from django.utils import simplejson as json from django.utils.http import http_date, parse_etags +from django.utils.encoding import smart_str from pithos.api.compat import parse_http_date_safe, parse_http_date from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, ItemNotFound, @@ -59,21 +60,24 @@ import uuid logger = logging.getLogger(__name__) +def rename_meta_key(d, old, new): + if old not in d: + return + d[new] = d[old] + del(d[old]) + def printable_header_dict(d): """Format a meta dictionary for printing out json/xml. - Convert all keys to lower case and replace dashes to underscores. - Change 'modified' key from backend to 'last_modified' and format date. + Convert all keys to lower case and replace dashes with underscores. + Format 'last_modified' timestamp. """ - if 'modified' in d: - d['last_modified'] = datetime.datetime.fromtimestamp(int(d['modified'])).isoformat() - del(d['modified']) + d['last_modified'] = datetime.datetime.fromtimestamp(int(d['last_modified'])).isoformat() return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()]) def format_header_key(k): """Convert underscores to dashes and capitalize intra-dash strings.""" - return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')]) def get_header_prefix(request, prefix): @@ -96,35 +100,42 @@ def get_account_headers(request): return meta, groups def put_account_headers(response, meta, groups): - response['X-Account-Container-Count'] = meta['count'] - response['X-Account-Bytes-Used'] = meta['bytes'] - if 'modified' in meta: - response['Last-Modified'] = http_date(int(meta['modified'])) + if 'count' in meta: + response['X-Account-Container-Count'] = meta['count'] + if 'bytes' in meta: + response['X-Account-Bytes-Used'] = meta['bytes'] + response['Last-Modified'] = http_date(int(meta['modified'])) for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]: - response[k.encode('utf-8')] = meta[k].encode('utf-8') + response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True) if 'until_timestamp' in meta: response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp'])) for k, v in groups.iteritems(): - response[format_header_key('X-Account-Group-' + k).encode('utf-8')] = (','.join(v)).encode('utf-8') - + k = smart_str(k, strings_only=True) + k = format_header_key('X-Account-Group-' + k) + v = smart_str(','.join(v), strings_only=True) + response[k] = v + def get_container_headers(request): meta = get_header_prefix(request, 'X-Container-Meta-') policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()]) return meta, policy def put_container_headers(response, meta, policy): - response['X-Container-Object-Count'] = meta['count'] - response['X-Container-Bytes-Used'] = meta['bytes'] + if 'count' in meta: + response['X-Container-Object-Count'] = meta['count'] + if 'bytes' in meta: + response['X-Container-Bytes-Used'] = meta['bytes'] response['Last-Modified'] = http_date(int(meta['modified'])) for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]: - response[k.encode('utf-8')] = meta[k].encode('utf-8') - response['X-Container-Object-Meta'] = [x[14:] for x in meta['object_meta'] if x.startswith('X-Object-Meta-')] + response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True) + l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')] + response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l]) response['X-Container-Block-Size'] = backend.block_size response['X-Container-Block-Hash'] = backend.hash_algorithm if 'until_timestamp' in meta: response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp'])) for k, v in policy.iteritems(): - response[format_header_key('X-Container-Policy-' + k).encode('utf-8')] = v.encode('utf-8') + response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True) def get_object_headers(request): meta = get_header_prefix(request, 'X-Object-Meta-') @@ -144,14 +155,16 @@ def put_object_headers(response, meta, restricted=False): response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream') response['Last-Modified'] = http_date(int(meta['modified'])) if not restricted: - response['X-Object-Modified-By'] = meta['modified_by'] + response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True) response['X-Object-Version'] = meta['version'] response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp'])) for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]: - response[k.encode('utf-8')] = meta[k].encode('utf-8') - for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest', 'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Public'): + response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True) + for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest', + 'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To', + 'X-Object-Public'): if k in meta: - response[k] = meta[k] + response[k] = smart_str(meta[k], strings_only=True) else: for k in ('Content-Encoding', 'Content-Disposition'): if k in meta: @@ -178,10 +191,10 @@ def update_manifest_meta(request, v_account, meta): md5.update(hash) meta['hash'] = md5.hexdigest().lower() -def update_sharing_meta(permissions, v_account, v_container, v_object, meta): +def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta): if permissions is None: return - perm_path, perms = permissions + allowed, perm_path, perms = permissions if len(perms) == 0: return ret = [] @@ -194,6 +207,8 @@ def update_sharing_meta(permissions, v_account, v_container, v_object, meta): meta['X-Object-Sharing'] = '; '.join(ret) if '/'.join((v_account, v_container, v_object)) != perm_path: meta['X-Object-Shared-By'] = perm_path + if request.user != v_account: + meta['X-Object-Allowed-To'] = allowed def update_public_meta(public, meta): if not public: @@ -221,18 +236,24 @@ def validate_modification_preconditions(request, meta): def validate_matching_preconditions(request, meta): """Check that the ETag conforms with the preconditions set.""" - if 'hash' not in meta: - return # TODO: Always return? + hash = meta.get('hash', None) if_match = request.META.get('HTTP_IF_MATCH') - if if_match is not None and if_match != '*': - if meta['hash'] not in [x.lower() for x in parse_etags(if_match)]: - raise PreconditionFailed('Resource Etag does not match') + if if_match is not None: + if hash is None: + raise PreconditionFailed('Resource does not exist') + if if_match != '*' and hash not in [x.lower() for x in parse_etags(if_match)]: + raise PreconditionFailed('Resource ETag does not match') if_none_match = request.META.get('HTTP_IF_NONE_MATCH') if if_none_match is not None: - if if_none_match == '*' or meta['hash'] in [x.lower() for x in parse_etags(if_none_match)]: - raise NotModified('Resource Etag matches') + # TODO: If this passes, must ignore If-Modified-Since header. + if hash is not None: + if if_none_match == '*' or hash in [x.lower() for x in parse_etags(if_none_match)]: + # TODO: Continue if an If-Modified-Since header is present. + if request.method in ('HEAD', 'GET'): + raise NotModified('Resource ETag matches') + raise PreconditionFailed('Resource exists or ETag matches') def split_container_object_string(s): if not len(s) > 0 or s[0] != '/': @@ -250,12 +271,12 @@ def copy_or_move_object(request, v_account, src_container, src_name, dest_contai src_version = request.META.get('HTTP_X_SOURCE_VERSION') try: if move: - backend.move_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, False, permissions) + version_id = backend.move_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, False, permissions) else: - backend.copy_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, False, permissions, src_version) + version_id = backend.copy_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, False, permissions, src_version) except NotAllowedError: raise Unauthorized('Access denied') - except NameError, IndexError: + except (NameError, IndexError): raise ItemNotFound('Container or object does not exist') except ValueError: raise BadRequest('Invalid sharing header') @@ -268,9 +289,9 @@ def copy_or_move_object(request, v_account, src_container, src_name, dest_contai raise Unauthorized('Access denied') except NameError: raise ItemNotFound('Object does not exist') + return version_id -def get_int_parameter(request, name): - p = request.GET.get(name) +def get_int_parameter(p): if p is not None: try: p = int(p) @@ -281,15 +302,9 @@ def get_int_parameter(request, name): return p def get_content_length(request): - content_length = request.META.get('CONTENT_LENGTH') - if not content_length: - raise LengthRequired('Missing Content-Length header') - try: - content_length = int(content_length) - if content_length < 0: - raise ValueError - except ValueError: - raise BadRequest('Invalid Content-Length header') + content_length = get_int_parameter(request.META.get('CONTENT_LENGTH')) + if content_length is None: + raise LengthRequired('Missing or invalid Content-Length header') return content_length def get_range(request, size): @@ -379,13 +394,16 @@ def get_sharing(request): if permissions is None: return None + # TODO: Document or remove '~' replacing. + permissions = permissions.replace('~', '') + ret = {} permissions = permissions.replace(' ', '') if permissions == '': return ret for perm in (x for x in permissions.split(';')): if perm.startswith('read='): - ret['read'] = [v.replace(' ','').lower() for v in perm[5:].split(',')] + ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')])) if '' in ret['read']: ret['read'].remove('') if '*' in ret['read']: @@ -393,7 +411,7 @@ def get_sharing(request): if len(ret['read']) == 0: raise BadRequest('Bad X-Object-Sharing header value') elif perm.startswith('write='): - ret['write'] = [v.replace(' ','').lower() for v in perm[6:].split(',')] + ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')])) if '' in ret['write']: ret['write'].remove('') if '*' in ret['write']: @@ -402,6 +420,15 @@ def get_sharing(request): raise BadRequest('Bad X-Object-Sharing header value') else: raise BadRequest('Bad X-Object-Sharing header value') + + # Keep duplicates only in write list. + dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*'] + if dups: + for x in dups: + ret['read'].remove(x) + if len(ret['read']) == 0: + del(ret['read']) + return ret def get_public(request): @@ -425,26 +452,33 @@ def raw_input_socket(request): """Return the socket for reading the rest of the request.""" server_software = request.META.get('SERVER_SOFTWARE') - if not server_software: - if 'wsgi.input' in request.environ: - return request.environ['wsgi.input'] - raise ServiceUnavailable('Unknown server software') - if server_software.startswith('WSGIServer'): - return request.environ['wsgi.input'] - elif server_software.startswith('mod_python'): + if server_software and server_software.startswith('mod_python'): return request._req + if 'wsgi.input' in request.environ: + return request.environ['wsgi.input'] raise ServiceUnavailable('Unknown server software') MAX_UPLOAD_SIZE = 10 * (1024 * 1024) # 10MB -def socket_read_iterator(sock, length=0, blocksize=4096): +def socket_read_iterator(request, length=0, blocksize=4096): """Return a maximum of blocksize data read from the socket in each iteration. Read up to 'length'. If 'length' is negative, will attempt a chunked read. The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE. """ + sock = raw_input_socket(request) if length < 0: # Chunked transfers + # Small version (server does the dechunking). + if request.environ.get('mod_wsgi.input_chunked', None): + while length < MAX_UPLOAD_SIZE: + data = sock.read(blocksize) + if data == '': + return + yield data + raise BadRequest('Maximum size is reached') + + # Long version (do the dechunking). data = '' while length < MAX_UPLOAD_SIZE: # Get chunk size. @@ -479,11 +513,10 @@ def socket_read_iterator(sock, length=0, blocksize=4096): data = data[blocksize:] yield ret sock.read(2) # CRLF - # TODO: Raise something to note that maximum size is reached. + raise BadRequest('Maximum size is reached') else: if length > MAX_UPLOAD_SIZE: - # TODO: Raise something to note that maximum size is reached. - pass + raise BadRequest('Maximum size is reached') while length > 0: data = sock.read(min(length, blocksize)) length -= len(data) @@ -589,11 +622,10 @@ def object_data_response(request, sizes, hashmaps, meta, public=False): if len(check) > 0: raise RangeNotSatisfiable('Requested range exceeds object limits') ret = 206 - if_range = request.META.get('HTTP_IF_RANGE', '') - if if_range and if_range.startswith('If-Range:'): - if_range = if_range.split('If-Range:')[1] + if_range = request.META.get('HTTP_IF_RANGE') + if if_range: try: - # modification time has passed instead + # Modification time has passed instead. last_modified = parse_http_date(if_range) if last_modified != meta['modified']: ranges = [(0, size)] @@ -643,14 +675,15 @@ def hashmap_hash(hashmap): if len(hashmap) == 0: return hexlify(subhash('')) if len(hashmap) == 1: - return hexlify(subhash(hashmap[0])) + return hashmap[0] + s = 2 while s < len(hashmap): s = s * 2 - h = hashmap + ([('\x00' * len(hashmap[0]))] * (s - len(hashmap))) - h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)] + h = [unhexlify(x) for x in hashmap] + h += [('\x00' * len(h[0]))] * (s - len(hashmap)) while len(h) > 1: - h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)] + h = [subhash(h[x] + h[x + 1]) for x in range(0, len(h), 2)] return hexlify(h[0]) def update_response_headers(request, response): @@ -660,7 +693,10 @@ def update_response_headers(request, response): response['Content-Type'] = 'application/json; charset=UTF-8' elif not response['Content-Type']: response['Content-Type'] = 'text/plain; charset=UTF-8' - + + if not response.has_header('Content-Length') and not (response.has_header('Content-Type') and response['Content-Type'].startswith('multipart/byteranges')): + response['Content-Length'] = len(response.content) + if settings.TEST: response['Date'] = format_date_time(time())