X-Git-Url: https://code.grnet.gr/git/pithos/blobdiff_plain/ab2e317e7fd06d419c93e11e03f6ac35f70f1aad..f9f15f9298274c5af6c94599bd46e663c73414b3:/pithos/api/util.py

diff --git a/pithos/api/util.py b/pithos/api/util.py
index c8d3e1c..cc486bd 100644
--- a/pithos/api/util.py
+++ b/pithos/api/util.py
@@ -35,12 +35,13 @@ from functools import wraps
 from time import time
 from traceback import format_exc
 from wsgiref.handlers import format_date_time
-from binascii import hexlify
+from binascii import hexlify, unhexlify
 
 from django.conf import settings
 from django.http import HttpResponse
 from django.utils import simplejson as json
 from django.utils.http import http_date, parse_etags
+from django.utils.encoding import smart_str
 
 from pithos.api.compat import parse_http_date_safe, parse_http_date
 from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, ItemNotFound,
@@ -59,21 +60,24 @@ import uuid
 logger = logging.getLogger(__name__)
 
 
+def rename_meta_key(d, old, new):
+    if old not in d:
+        return
+    d[new] = d[old]
+    del(d[old])
+
 def printable_header_dict(d):
     """Format a meta dictionary for printing out json/xml.
     
-    Convert all keys to lower case and replace dashes to underscores.
-    Change 'modified' key from backend to 'last_modified' and format date.
+    Convert all keys to lower case and replace dashes with underscores.
+    Format 'last_modified' timestamp.
     """
     
-    if 'modified' in d:
-        d['last_modified'] = datetime.datetime.fromtimestamp(int(d['modified'])).isoformat()
-        del(d['modified'])
+    d['last_modified'] = datetime.datetime.fromtimestamp(int(d['last_modified'])).isoformat()
     return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
 
 def format_header_key(k):
     """Convert underscores to dashes and capitalize intra-dash strings."""
-    
     return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
 
 def get_header_prefix(request, prefix):
@@ -96,35 +100,42 @@ def get_account_headers(request):
     return meta, groups
 
 def put_account_headers(response, meta, groups):
-    response['X-Account-Container-Count'] = meta['count']
-    response['X-Account-Bytes-Used'] = meta['bytes']
-    if 'modified' in meta:
-        response['Last-Modified'] = http_date(int(meta['modified']))
+    if 'count' in meta:
+        response['X-Account-Container-Count'] = meta['count']
+    if 'bytes' in meta:
+        response['X-Account-Bytes-Used'] = meta['bytes']
+    response['Last-Modified'] = http_date(int(meta['modified']))
     for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
-        response[k.encode('utf-8')] = meta[k].encode('utf-8')
+        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
     if 'until_timestamp' in meta:
         response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
     for k, v in groups.iteritems():
-        response[format_header_key('X-Account-Group-' + k).encode('utf-8')] = (','.join(v)).encode('utf-8')
-
+        k = smart_str(k, strings_only=True)
+        k = format_header_key('X-Account-Group-' + k)
+        v = smart_str(','.join(v), strings_only=True)
+        response[k] = v
+    
 def get_container_headers(request):
     meta = get_header_prefix(request, 'X-Container-Meta-')
     policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
     return meta, policy
 
 def put_container_headers(response, meta, policy):
-    response['X-Container-Object-Count'] = meta['count']
-    response['X-Container-Bytes-Used'] = meta['bytes']
+    if 'count' in meta:
+        response['X-Container-Object-Count'] = meta['count']
+    if 'bytes' in meta:
+        response['X-Container-Bytes-Used'] = meta['bytes']
     response['Last-Modified'] = http_date(int(meta['modified']))
     for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
-        response[k.encode('utf-8')] = meta[k].encode('utf-8')
-    response['X-Container-Object-Meta'] = [x[14:] for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
+        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
+    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
+    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
     response['X-Container-Block-Size'] = backend.block_size
     response['X-Container-Block-Hash'] = backend.hash_algorithm
     if 'until_timestamp' in meta:
         response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
     for k, v in policy.iteritems():
-        response[format_header_key('X-Container-Policy-' + k).encode('utf-8')] = v.encode('utf-8')
+        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
 
 def get_object_headers(request):
     meta = get_header_prefix(request, 'X-Object-Meta-')
@@ -144,14 +155,16 @@ def put_object_headers(response, meta, restricted=False):
     response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
     response['Last-Modified'] = http_date(int(meta['modified']))
     if not restricted:
-        response['X-Object-Modified-By'] = meta['modified_by']
+        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
         response['X-Object-Version'] = meta['version']
         response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
         for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
-            response[k.encode('utf-8')] = meta[k].encode('utf-8')
-        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest', 'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Public'):
+            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
+        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
+                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
+                  'X-Object-Public'):
             if k in meta:
-                response[k] = meta[k]
+                response[k] = smart_str(meta[k], strings_only=True)
     else:
         for k in ('Content-Encoding', 'Content-Disposition'):
             if k in meta:
@@ -178,10 +191,10 @@ def update_manifest_meta(request, v_account, meta):
         md5.update(hash)
         meta['hash'] = md5.hexdigest().lower()
 
-def update_sharing_meta(permissions, v_account, v_container, v_object, meta):
+def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
     if permissions is None:
         return
-    perm_path, perms = permissions
+    allowed, perm_path, perms = permissions
     if len(perms) == 0:
         return
     ret = []
@@ -194,6 +207,8 @@ def update_sharing_meta(permissions, v_account, v_container, v_object, meta):
     meta['X-Object-Sharing'] = '; '.join(ret)
     if '/'.join((v_account, v_container, v_object)) != perm_path:
         meta['X-Object-Shared-By'] = perm_path
+    if request.user != v_account:
+        meta['X-Object-Allowed-To'] = allowed
 
 def update_public_meta(public, meta):
     if not public:
@@ -221,18 +236,24 @@ def validate_modification_preconditions(request, meta):
 def validate_matching_preconditions(request, meta):
     """Check that the ETag conforms with the preconditions set."""
     
-    if 'hash' not in meta:
-        return # TODO: Always return?
+    hash = meta.get('hash', None)
     
     if_match = request.META.get('HTTP_IF_MATCH')
-    if if_match is not None and if_match != '*':
-        if meta['hash'] not in [x.lower() for x in parse_etags(if_match)]:
-            raise PreconditionFailed('Resource Etag does not match')
+    if if_match is not None:
+        if hash is None:
+            raise PreconditionFailed('Resource does not exist')
+        if if_match != '*' and hash not in [x.lower() for x in parse_etags(if_match)]:
+            raise PreconditionFailed('Resource ETag does not match')
     
     if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
     if if_none_match is not None:
-        if if_none_match == '*' or meta['hash'] in [x.lower() for x in parse_etags(if_none_match)]:
-            raise NotModified('Resource Etag matches')
+        # TODO: If this passes, must ignore If-Modified-Since header.
+        if hash is not None:
+            if if_none_match == '*' or hash in [x.lower() for x in parse_etags(if_none_match)]:
+                # TODO: Continue if an If-Modified-Since header is present.
+                if request.method in ('HEAD', 'GET'):
+                    raise NotModified('Resource ETag matches')
+                raise PreconditionFailed('Resource exists or ETag matches')
 
 def split_container_object_string(s):
     if not len(s) > 0 or s[0] != '/':
@@ -250,12 +271,12 @@ def copy_or_move_object(request, v_account, src_container, src_name, dest_contai
     src_version = request.META.get('HTTP_X_SOURCE_VERSION')    
     try:
         if move:
-            backend.move_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, False, permissions)
+            version_id = backend.move_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, False, permissions)
         else:
-            backend.copy_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, False, permissions, src_version)
+            version_id = backend.copy_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, False, permissions, src_version)
     except NotAllowedError:
         raise Unauthorized('Access denied')
-    except NameError, IndexError:
+    except (NameError, IndexError):
         raise ItemNotFound('Container or object does not exist')
     except ValueError:
         raise BadRequest('Invalid sharing header')
@@ -268,9 +289,9 @@ def copy_or_move_object(request, v_account, src_container, src_name, dest_contai
             raise Unauthorized('Access denied')
         except NameError:
             raise ItemNotFound('Object does not exist')
+    return version_id
 
-def get_int_parameter(request, name):
-    p = request.GET.get(name)
+def get_int_parameter(p):
     if p is not None:
         try:
             p = int(p)
@@ -281,15 +302,9 @@ def get_int_parameter(request, name):
     return p
 
 def get_content_length(request):
-    content_length = request.META.get('CONTENT_LENGTH')
-    if not content_length:
-        raise LengthRequired('Missing Content-Length header')
-    try:
-        content_length = int(content_length)
-        if content_length < 0:
-            raise ValueError
-    except ValueError:
-        raise BadRequest('Invalid Content-Length header')
+    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
+    if content_length is None:
+        raise LengthRequired('Missing or invalid Content-Length header')
     return content_length
 
 def get_range(request, size):
@@ -379,13 +394,16 @@ def get_sharing(request):
     if permissions is None:
         return None
     
+    # TODO: Document or remove '~' replacing.
+    permissions = permissions.replace('~', '')
+    
     ret = {}
     permissions = permissions.replace(' ', '')
     if permissions == '':
         return ret
     for perm in (x for x in permissions.split(';')):
         if perm.startswith('read='):
-            ret['read'] = [v.replace(' ','').lower() for v in perm[5:].split(',')]
+            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
             if '' in ret['read']:
                 ret['read'].remove('')
             if '*' in ret['read']:
@@ -393,7 +411,7 @@ def get_sharing(request):
             if len(ret['read']) == 0:
                 raise BadRequest('Bad X-Object-Sharing header value')
         elif perm.startswith('write='):
-            ret['write'] = [v.replace(' ','').lower() for v in perm[6:].split(',')]
+            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
             if '' in ret['write']:
                 ret['write'].remove('')
             if '*' in ret['write']:
@@ -402,6 +420,15 @@ def get_sharing(request):
                 raise BadRequest('Bad X-Object-Sharing header value')
         else:
             raise BadRequest('Bad X-Object-Sharing header value')
+    
+    # Keep duplicates only in write list.
+    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
+    if dups:
+        for x in dups:
+            ret['read'].remove(x)
+        if len(ret['read']) == 0:
+            del(ret['read'])
+    
     return ret
 
 def get_public(request):
@@ -425,26 +452,33 @@ def raw_input_socket(request):
     """Return the socket for reading the rest of the request."""
     
     server_software = request.META.get('SERVER_SOFTWARE')
-    if not server_software:
-        if 'wsgi.input' in request.environ:
-            return request.environ['wsgi.input']
-        raise ServiceUnavailable('Unknown server software')
-    if server_software.startswith('WSGIServer'):
-        return request.environ['wsgi.input']
-    elif server_software.startswith('mod_python'):
+    if server_software and server_software.startswith('mod_python'):
         return request._req
+    if 'wsgi.input' in request.environ:
+        return request.environ['wsgi.input']
     raise ServiceUnavailable('Unknown server software')
 
 MAX_UPLOAD_SIZE = 10 * (1024 * 1024) # 10MB
 
-def socket_read_iterator(sock, length=0, blocksize=4096):
+def socket_read_iterator(request, length=0, blocksize=4096):
     """Return a maximum of blocksize data read from the socket in each iteration.
     
     Read up to 'length'. If 'length' is negative, will attempt a chunked read.
     The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
     """
     
+    sock = raw_input_socket(request)
     if length < 0: # Chunked transfers
+        # Small version (server does the dechunking).
+        if request.environ.get('mod_wsgi.input_chunked', None):
+            while length < MAX_UPLOAD_SIZE:
+                data = sock.read(blocksize)
+                if data == '':
+                    return
+                yield data
+            raise BadRequest('Maximum size is reached')
+        
+        # Long version (do the dechunking).
         data = ''
         while length < MAX_UPLOAD_SIZE:
             # Get chunk size.
@@ -479,11 +513,10 @@ def socket_read_iterator(sock, length=0, blocksize=4096):
                     data = data[blocksize:]
                     yield ret
             sock.read(2) # CRLF
-        # TODO: Raise something to note that maximum size is reached.
+        raise BadRequest('Maximum size is reached')
     else:
         if length > MAX_UPLOAD_SIZE:
-            # TODO: Raise something to note that maximum size is reached.
-            pass
+            raise BadRequest('Maximum size is reached')
         while length > 0:
             data = sock.read(min(length, blocksize))
             length -= len(data)
@@ -589,11 +622,10 @@ def object_data_response(request, sizes, hashmaps, meta, public=False):
         if len(check) > 0:
             raise RangeNotSatisfiable('Requested range exceeds object limits')
         ret = 206
-        if_range = request.META.get('HTTP_IF_RANGE', '')
-        if if_range and if_range.startswith('If-Range:'):
-            if_range = if_range.split('If-Range:')[1]
+        if_range = request.META.get('HTTP_IF_RANGE')
+        if if_range:
             try:
-                # modification time has passed instead
+                # Modification time has passed instead.
                 last_modified = parse_http_date(if_range)
                 if last_modified != meta['modified']:
                     ranges = [(0, size)]
@@ -643,14 +675,15 @@ def hashmap_hash(hashmap):
     if len(hashmap) == 0:
         return hexlify(subhash(''))
     if len(hashmap) == 1:
-        return hexlify(subhash(hashmap[0]))
+        return hashmap[0]
+    
     s = 2
     while s < len(hashmap):
         s = s * 2
-    h = hashmap + ([('\x00' * len(hashmap[0]))] * (s - len(hashmap)))
-    h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)]
+    h = [unhexlify(x) for x in hashmap]
+    h += [('\x00' * len(h[0]))] * (s - len(hashmap))
     while len(h) > 1:
-        h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)]
+        h = [subhash(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
     return hexlify(h[0])
 
 def update_response_headers(request, response):
@@ -660,7 +693,10 @@ def update_response_headers(request, response):
         response['Content-Type'] = 'application/json; charset=UTF-8'
     elif not response['Content-Type']:
         response['Content-Type'] = 'text/plain; charset=UTF-8'
-
+    
+    if not response.has_header('Content-Length') and not (response.has_header('Content-Type') and response['Content-Type'].startswith('multipart/byteranges')):
+        response['Content-Length'] = len(response.content)
+    
     if settings.TEST:
         response['Date'] = format_date_time(time())