Use 'hashmap' parameter in object GET/PUT to use hashmaps.

[pithos] / pithos / api / util.py
diff --git a/pithos/api/util.py b/pithos/api/util.py

index 32c565f..cc486bd 100644 (file)
--- a/pithos/api/util.py
+++ b/pithos/api/util.py
@@ -35,12 +35,13 @@ from functools import wraps
  from time import time
  from traceback import format_exc
  from wsgiref.handlers import format_date_time
-from binascii import hexlify
+from binascii import hexlify, unhexlify
  
  from django.conf import settings
  from django.http import HttpResponse
  from django.utils import simplejson as json
  from django.utils.http import http_date, parse_etags
+from django.utils.encoding import smart_str
  
  from pithos.api.compat import parse_http_date_safe, parse_http_date
  from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, ItemNotFound,
@@ -77,7 +78,6 @@ def printable_header_dict(d):
  
  def format_header_key(k):
      """Convert underscores to dashes and capitalize intra-dash strings."""
-    
      return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
  
  def get_header_prefix(request, prefix):
@@ -106,12 +106,15 @@ def put_account_headers(response, meta, groups):
          response['X-Account-Bytes-Used'] = meta['bytes']
      response['Last-Modified'] = http_date(int(meta['modified']))
      for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
-        response[k.encode('utf-8')] = meta[k].encode('utf-8')
+        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
      if 'until_timestamp' in meta:
          response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
      for k, v in groups.iteritems():
-        response[format_header_key('X-Account-Group-' + k).encode('utf-8')] = (','.join(v)).encode('utf-8')
-
+        k = smart_str(k, strings_only=True)
+        k = format_header_key('X-Account-Group-' + k)
+        v = smart_str(','.join(v), strings_only=True)
+        response[k] = v
+    
  def get_container_headers(request):
      meta = get_header_prefix(request, 'X-Container-Meta-')
      policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
@@ -124,14 +127,15 @@ def put_container_headers(response, meta, policy):
          response['X-Container-Bytes-Used'] = meta['bytes']
      response['Last-Modified'] = http_date(int(meta['modified']))
      for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
-        response[k.encode('utf-8')] = meta[k].encode('utf-8')
-    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in meta['object_meta'] if x.startswith('X-Object-Meta-')])
+        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
+    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
+    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
      response['X-Container-Block-Size'] = backend.block_size
      response['X-Container-Block-Hash'] = backend.hash_algorithm
      if 'until_timestamp' in meta:
          response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
      for k, v in policy.iteritems():
-        response[format_header_key('X-Container-Policy-' + k).encode('utf-8')] = v.encode('utf-8')
+        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
  
  def get_object_headers(request):
      meta = get_header_prefix(request, 'X-Object-Meta-')
@@ -151,14 +155,16 @@ def put_object_headers(response, meta, restricted=False):
      response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
      response['Last-Modified'] = http_date(int(meta['modified']))
      if not restricted:
-        response['X-Object-Modified-By'] = meta['modified_by']
+        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
          response['X-Object-Version'] = meta['version']
          response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
          for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
-            response[k.encode('utf-8')] = meta[k].encode('utf-8')
-        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest', 'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Public'):
+            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
+        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
+                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
+                  'X-Object-Public'):
              if k in meta:
-                response[k] = meta[k]
+                response[k] = smart_str(meta[k], strings_only=True)
      else:
          for k in ('Content-Encoding', 'Content-Disposition'):
              if k in meta:
@@ -185,10 +191,10 @@ def update_manifest_meta(request, v_account, meta):
          md5.update(hash)
          meta['hash'] = md5.hexdigest().lower()
  
-def update_sharing_meta(permissions, v_account, v_container, v_object, meta):
+def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
      if permissions is None:
          return
-    perm_path, perms = permissions
+    allowed, perm_path, perms = permissions
      if len(perms) == 0:
          return
      ret = []
@@ -201,6 +207,8 @@ def update_sharing_meta(permissions, v_account, v_container, v_object, meta):
      meta['X-Object-Sharing'] = '; '.join(ret)
      if '/'.join((v_account, v_container, v_object)) != perm_path:
          meta['X-Object-Shared-By'] = perm_path
+    if request.user != v_account:
+        meta['X-Object-Allowed-To'] = allowed
  
  def update_public_meta(public, meta):
      if not public:
@@ -228,18 +236,24 @@ def validate_modification_preconditions(request, meta):
  def validate_matching_preconditions(request, meta):
      """Check that the ETag conforms with the preconditions set."""
      
-    if 'hash' not in meta:
-        return # TODO: Always return?
+    hash = meta.get('hash', None)
      
      if_match = request.META.get('HTTP_IF_MATCH')
-    if if_match is not None and if_match != '*':
-        if meta['hash'] not in [x.lower() for x in parse_etags(if_match)]:
-            raise PreconditionFailed('Resource Etag does not match')
+    if if_match is not None:
+        if hash is None:
+            raise PreconditionFailed('Resource does not exist')
+        if if_match != '*' and hash not in [x.lower() for x in parse_etags(if_match)]:
+            raise PreconditionFailed('Resource ETag does not match')
      
      if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
      if if_none_match is not None:
-        if if_none_match == '*' or meta['hash'] in [x.lower() for x in parse_etags(if_none_match)]:
-            raise NotModified('Resource Etag matches')
+        # TODO: If this passes, must ignore If-Modified-Since header.
+        if hash is not None:
+            if if_none_match == '*' or hash in [x.lower() for x in parse_etags(if_none_match)]:
+                # TODO: Continue if an If-Modified-Since header is present.
+                if request.method in ('HEAD', 'GET'):
+                    raise NotModified('Resource ETag matches')
+                raise PreconditionFailed('Resource exists or ETag matches')
  
  def split_container_object_string(s):
      if not len(s) > 0 or s[0] != '/':
@@ -257,12 +271,12 @@ def copy_or_move_object(request, v_account, src_container, src_name, dest_contai
      src_version = request.META.get('HTTP_X_SOURCE_VERSION')    
      try:
          if move:
-            backend.move_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, False, permissions)
+            version_id = backend.move_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, False, permissions)
          else:
-            backend.copy_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, False, permissions, src_version)
+            version_id = backend.copy_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, False, permissions, src_version)
      except NotAllowedError:
          raise Unauthorized('Access denied')
-    except NameError, IndexError:
+    except (NameError, IndexError):
          raise ItemNotFound('Container or object does not exist')
      except ValueError:
          raise BadRequest('Invalid sharing header')
@@ -275,6 +289,7 @@ def copy_or_move_object(request, v_account, src_container, src_name, dest_contai
              raise Unauthorized('Access denied')
          except NameError:
              raise ItemNotFound('Object does not exist')
+    return version_id
  
  def get_int_parameter(p):
      if p is not None:
@@ -379,6 +394,9 @@ def get_sharing(request):
      if permissions is None:
          return None
      
+    # TODO: Document or remove '~' replacing.
+    permissions = permissions.replace('~', '')
+    
      ret = {}
      permissions = permissions.replace(' ', '')
      if permissions == '':
@@ -402,6 +420,15 @@ def get_sharing(request):
                  raise BadRequest('Bad X-Object-Sharing header value')
          else:
              raise BadRequest('Bad X-Object-Sharing header value')
+    
+    # Keep duplicates only in write list.
+    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
+    if dups:
+        for x in dups:
+            ret['read'].remove(x)
+        if len(ret['read']) == 0:
+            del(ret['read'])
+    
      return ret
  
  def get_public(request):
@@ -433,14 +460,25 @@ def raw_input_socket(request):
  
  MAX_UPLOAD_SIZE = 10 * (1024 * 1024) # 10MB
  
-def socket_read_iterator(sock, length=0, blocksize=4096):
+def socket_read_iterator(request, length=0, blocksize=4096):
      """Return a maximum of blocksize data read from the socket in each iteration.
      
      Read up to 'length'. If 'length' is negative, will attempt a chunked read.
      The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
      """
      
+    sock = raw_input_socket(request)
      if length < 0: # Chunked transfers
+        # Small version (server does the dechunking).
+        if request.environ.get('mod_wsgi.input_chunked', None):
+            while length < MAX_UPLOAD_SIZE:
+                data = sock.read(blocksize)
+                if data == '':
+                    return
+                yield data
+            raise BadRequest('Maximum size is reached')
+        
+        # Long version (do the dechunking).
          data = ''
          while length < MAX_UPLOAD_SIZE:
              # Get chunk size.
@@ -637,14 +675,15 @@ def hashmap_hash(hashmap):
      if len(hashmap) == 0:
          return hexlify(subhash(''))
      if len(hashmap) == 1:
-        return hexlify(subhash(hashmap[0]))
+        return hashmap[0]
+    
      s = 2
      while s < len(hashmap):
          s = s * 2
-    h = hashmap + ([('\x00' * len(hashmap[0]))] * (s - len(hashmap)))
-    h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)]
+    h = [unhexlify(x) for x in hashmap]
+    h += [('\x00' * len(h[0]))] * (s - len(hashmap))
      while len(h) > 1:
-        h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)]
+        h = [subhash(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
      return hexlify(h[0])
  
  def update_response_headers(request, response):
@@ -686,12 +725,12 @@ def request_serialization(request, format_allowed=False):
      elif format == 'xml':
          return 'xml'
      
-#     for item in request.META.get('HTTP_ACCEPT', '').split(','):
-#         accept, sep, rest = item.strip().partition(';')
-#         if accept == 'application/json':
-#             return 'json'
-#         elif accept == 'application/xml' or accept == 'text/xml':
-#             return 'xml'
+    for item in request.META.get('HTTP_ACCEPT', '').split(','):
+        accept, sep, rest = item.strip().partition(';')
+        if accept == 'application/json':
+            return 'json'
+        elif accept == 'application/xml' or accept == 'text/xml':
+            return 'xml'
      
      return 'text'