code.grnet.gr Git - pithos/blob - snf-pithos-app/pithos/api/util.py

   1 # Copyright 2011-2012 GRNET S.A. All rights reserved.
   2 #
   3 # Redistribution and use in source and binary forms, with or
   4 # without modification, are permitted provided that the following
   5 # conditions are met:
   6 #
   7 #   1. Redistributions of source code must retain the above
   8 #      copyright notice, this list of conditions and the following
   9 #      disclaimer.
  10 #
  11 #   2. Redistributions in binary form must reproduce the above
  12 #      copyright notice, this list of conditions and the following
  13 #      disclaimer in the documentation and/or other materials
  14 #      provided with the distribution.
  15 #
  16 # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
  17 # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  19 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
  20 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  23 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  24 # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  26 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27 # POSSIBILITY OF SUCH DAMAGE.
  28 #
  29 # The views and conclusions contained in the software and
  30 # documentation are those of the authors and should not be
  31 # interpreted as representing official policies, either expressed
  32 # or implied, of GRNET S.A.
  33
  34 from functools import wraps
  35 from time import time
  36 from traceback import format_exc
  37 from wsgiref.handlers import format_date_time
  38 from binascii import hexlify, unhexlify
  39 from datetime import datetime, tzinfo, timedelta
  40 from urllib import quote, unquote
  41
  42 from django.conf import settings
  43 from django.http import HttpResponse
  44 from django.utils import simplejson as json
  45 from django.utils.http import http_date, parse_etags
  46 from django.utils.encoding import smart_unicode, smart_str
  47 from django.core.files.uploadhandler import FileUploadHandler
  48 from django.core.files.uploadedfile import UploadedFile
  49
  50 from pithos.lib.compat import parse_http_date_safe, parse_http_date
  51 from pithos.lib.hashmap import HashMap
  52
  53 from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
  54                                 Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
  55                                 RangeNotSatisfiable, ServiceUnavailable)
  56 from pithos.api.short_url import encode_url
  57 from pithos.backends import connect_backend
  58 from pithos.backends.base import NotAllowedError, QuotaError
  59
  60 import logging
  61 import re
  62 import hashlib
  63 import uuid
  64 import decimal
  65
  66
  67 logger = logging.getLogger(__name__)
  68
  69
  70 class UTC(tzinfo):
  71    def utcoffset(self, dt):
  72        return timedelta(0)
  73
  74    def tzname(self, dt):
  75        return 'UTC'
  76
  77    def dst(self, dt):
  78        return timedelta(0)
  79
  80 def json_encode_decimal(obj):
  81     if isinstance(obj, decimal.Decimal):
  82         return str(obj)
  83     raise TypeError(repr(obj) + " is not JSON serializable")
  84
  85 def isoformat(d):
  86    """Return an ISO8601 date string that includes a timezone."""
  87
  88    return d.replace(tzinfo=UTC()).isoformat()
  89
  90 def rename_meta_key(d, old, new):
  91     if old not in d:
  92         return
  93     d[new] = d[old]
  94     del(d[old])
  95
  96 def printable_header_dict(d):
  97     """Format a meta dictionary for printing out json/xml.
  98
  99     Convert all keys to lower case and replace dashes with underscores.
 100     Format 'last_modified' timestamp.
 101     """
 102
 103     if 'last_modified' in d:
 104         d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
 105     return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
 106
 107 def format_header_key(k):
 108     """Convert underscores to dashes and capitalize intra-dash strings."""
 109     return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
 110
 111 def get_header_prefix(request, prefix):
 112     """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
 113
 114     prefix = 'HTTP_' + prefix.upper().replace('-', '_')
 115     # TODO: Document or remove '~' replacing.
 116     return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
 117
 118 def get_account_headers(request):
 119     meta = get_header_prefix(request, 'X-Account-Meta-')
 120     groups = {}
 121     for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
 122         n = k[16:].lower()
 123         if '-' in n or '_' in n:
 124             raise BadRequest('Bad characters in group name')
 125         groups[n] = v.replace(' ', '').split(',')
 126         while '' in groups[n]:
 127             groups[n].remove('')
 128     return meta, groups
 129
 130 def put_account_headers(response, meta, groups, policy):
 131     if 'count' in meta:
 132         response['X-Account-Container-Count'] = meta['count']
 133     if 'bytes' in meta:
 134         response['X-Account-Bytes-Used'] = meta['bytes']
 135     response['Last-Modified'] = http_date(int(meta['modified']))
 136     for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
 137         response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
 138     if 'until_timestamp' in meta:
 139         response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
 140     for k, v in groups.iteritems():
 141         k = smart_str(k, strings_only=True)
 142         k = format_header_key('X-Account-Group-' + k)
 143         v = smart_str(','.join(v), strings_only=True)
 144         response[k] = v
 145     for k, v in policy.iteritems():
 146         response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
 147
 148 def get_container_headers(request):
 149     meta = get_header_prefix(request, 'X-Container-Meta-')
 150     policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
 151     return meta, policy
 152
 153 def put_container_headers(request, response, meta, policy):
 154     if 'count' in meta:
 155         response['X-Container-Object-Count'] = meta['count']
 156     if 'bytes' in meta:
 157         response['X-Container-Bytes-Used'] = meta['bytes']
 158     response['Last-Modified'] = http_date(int(meta['modified']))
 159     for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
 160         response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
 161     l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
 162     response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
 163     response['X-Container-Block-Size'] = request.backend.block_size
 164     response['X-Container-Block-Hash'] = request.backend.hash_algorithm
 165     if 'until_timestamp' in meta:
 166         response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
 167     for k, v in policy.iteritems():
 168         response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
 169
 170 def get_object_headers(request):
 171     meta = get_header_prefix(request, 'X-Object-Meta-')
 172     if request.META.get('CONTENT_TYPE'):
 173         meta['Content-Type'] = request.META['CONTENT_TYPE']
 174     if request.META.get('HTTP_CONTENT_ENCODING'):
 175         meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
 176     if request.META.get('HTTP_CONTENT_DISPOSITION'):
 177         meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
 178     if request.META.get('HTTP_X_OBJECT_MANIFEST'):
 179         meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
 180     return meta, get_sharing(request), get_public(request)
 181
 182 def put_object_headers(response, meta, restricted=False):
 183     response['ETag'] = meta['ETag'] if 'ETag' in meta else meta['hash']
 184     response['Content-Length'] = meta['bytes']
 185     response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
 186     response['Last-Modified'] = http_date(int(meta['modified']))
 187     if not restricted:
 188         response['X-Object-Hash'] = meta['hash']
 189         response['X-Object-UUID'] = meta['uuid']
 190         response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
 191         response['X-Object-Version'] = meta['version']
 192         response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
 193         for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
 194             response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
 195         for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
 196                   'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
 197                   'X-Object-Public'):
 198             if k in meta:
 199                 response[k] = smart_str(meta[k], strings_only=True)
 200     else:
 201         for k in ('Content-Encoding', 'Content-Disposition'):
 202             if k in meta:
 203                 response[k] = smart_str(meta[k], strings_only=True)
 204
 205 def update_manifest_meta(request, v_account, meta):
 206     """Update metadata if the object has an X-Object-Manifest."""
 207
 208     if 'X-Object-Manifest' in meta:
 209         etag = ''
 210         bytes = 0
 211         try:
 212             src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
 213             objects = request.backend.list_objects(request.user_uniq, v_account,
 214                                 src_container, prefix=src_name, virtual=False)
 215             for x in objects:
 216                 src_meta = request.backend.get_object_meta(request.user_uniq,
 217                                         v_account, src_container, x[0], 'pithos', x[1])
 218                 if 'ETag' in src_meta:
 219                     etag += src_meta['ETag']
 220                 bytes += src_meta['bytes']
 221         except:
 222             # Ignore errors.
 223             return
 224         meta['bytes'] = bytes
 225         md5 = hashlib.md5()
 226         md5.update(etag)
 227         meta['ETag'] = md5.hexdigest().lower()
 228
 229 def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
 230     if permissions is None:
 231         return
 232     allowed, perm_path, perms = permissions
 233     if len(perms) == 0:
 234         return
 235     ret = []
 236     r = ','.join(perms.get('read', []))
 237     if r:
 238         ret.append('read=' + r)
 239     w = ','.join(perms.get('write', []))
 240     if w:
 241         ret.append('write=' + w)
 242     meta['X-Object-Sharing'] = '; '.join(ret)
 243     if '/'.join((v_account, v_container, v_object)) != perm_path:
 244         meta['X-Object-Shared-By'] = perm_path
 245     if request.user_uniq != v_account:
 246         meta['X-Object-Allowed-To'] = allowed
 247
 248 def update_public_meta(public, meta):
 249     if not public:
 250         return
 251     meta['X-Object-Public'] = '/public/' + encode_url(public)
 252
 253 def validate_modification_preconditions(request, meta):
 254     """Check that the modified timestamp conforms with the preconditions set."""
 255
 256     if 'modified' not in meta:
 257         return # TODO: Always return?
 258
 259     if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
 260     if if_modified_since is not None:
 261         if_modified_since = parse_http_date_safe(if_modified_since)
 262     if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
 263         raise NotModified('Resource has not been modified')
 264
 265     if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
 266     if if_unmodified_since is not None:
 267         if_unmodified_since = parse_http_date_safe(if_unmodified_since)
 268     if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
 269         raise PreconditionFailed('Resource has been modified')
 270
 271 def validate_matching_preconditions(request, meta):
 272     """Check that the ETag conforms with the preconditions set."""
 273
 274     etag = meta.get('ETag', None)
 275
 276     if_match = request.META.get('HTTP_IF_MATCH')
 277     if if_match is not None:
 278         if etag is None:
 279             raise PreconditionFailed('Resource does not exist')
 280         if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
 281             raise PreconditionFailed('Resource ETag does not match')
 282
 283     if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
 284     if if_none_match is not None:
 285         # TODO: If this passes, must ignore If-Modified-Since header.
 286         if etag is not None:
 287             if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
 288                 # TODO: Continue if an If-Modified-Since header is present.
 289                 if request.method in ('HEAD', 'GET'):
 290                     raise NotModified('Resource ETag matches')
 291                 raise PreconditionFailed('Resource exists or ETag matches')
 292
 293 def split_container_object_string(s):
 294     if not len(s) > 0 or s[0] != '/':
 295         raise ValueError
 296     s = s[1:]
 297     pos = s.find('/')
 298     if pos == -1 or pos == len(s) - 1:
 299         raise ValueError
 300     return s[:pos], s[(pos + 1):]
 301
 302 def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
 303     """Copy or move an object."""
 304
 305     meta, permissions, public = get_object_headers(request)
 306     src_version = request.META.get('HTTP_X_SOURCE_VERSION')
 307     try:
 308         if move:
 309             version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
 310                                                         dest_account, dest_container, dest_name,
 311                                                         'pithos', meta, False, permissions)
 312         else:
 313             version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
 314                                                         dest_account, dest_container, dest_name,
 315                                                         'pithos', meta, False, permissions, src_version)
 316     except NotAllowedError:
 317         raise Forbidden('Not allowed')
 318     except (NameError, IndexError):
 319         raise ItemNotFound('Container or object does not exist')
 320     except ValueError:
 321         raise BadRequest('Invalid sharing header')
 322     except AttributeError, e:
 323         raise Conflict('\n'.join(e.data) + '\n')
 324     except QuotaError:
 325         raise RequestEntityTooLarge('Quota exceeded')
 326     if public is not None:
 327         try:
 328             request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
 329         except NotAllowedError:
 330             raise Forbidden('Not allowed')
 331         except NameError:
 332             raise ItemNotFound('Object does not exist')
 333     return version_id
 334
 335 def get_int_parameter(p):
 336     if p is not None:
 337         try:
 338             p = int(p)
 339         except ValueError:
 340             return None
 341         if p < 0:
 342             return None
 343     return p
 344
 345 def get_content_length(request):
 346     content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
 347     if content_length is None:
 348         raise LengthRequired('Missing or invalid Content-Length header')
 349     return content_length
 350
 351 def get_range(request, size):
 352     """Parse a Range header from the request.
 353
 354     Either returns None, when the header is not existent or should be ignored,
 355     or a list of (offset, length) tuples - should be further checked.
 356     """
 357
 358     ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
 359     if not ranges.startswith('bytes='):
 360         return None
 361
 362     ret = []
 363     for r in (x.strip() for x in ranges[6:].split(',')):
 364         p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
 365         m = p.match(r)
 366         if not m:
 367             return None
 368         offset = m.group('offset')
 369         upto = m.group('upto')
 370         if offset == '' and upto == '':
 371             return None
 372
 373         if offset != '':
 374             offset = int(offset)
 375             if upto != '':
 376                 upto = int(upto)
 377                 if offset > upto:
 378                     return None
 379                 ret.append((offset, upto - offset + 1))
 380             else:
 381                 ret.append((offset, size - offset))
 382         else:
 383             length = int(upto)
 384             ret.append((size - length, length))
 385
 386     return ret
 387
 388 def get_content_range(request):
 389     """Parse a Content-Range header from the request.
 390
 391     Either returns None, when the header is not existent or should be ignored,
 392     or an (offset, length, total) tuple - check as length, total may be None.
 393     Returns (None, None, None) if the provided range is '*/*'.
 394     """
 395
 396     ranges = request.META.get('HTTP_CONTENT_RANGE', '')
 397     if not ranges:
 398         return None
 399
 400     p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
 401     m = p.match(ranges)
 402     if not m:
 403         if ranges == 'bytes */*':
 404             return (None, None, None)
 405         return None
 406     offset = int(m.group('offset'))
 407     upto = m.group('upto')
 408     total = m.group('total')
 409     if upto != '':
 410         upto = int(upto)
 411     else:
 412         upto = None
 413     if total != '*':
 414         total = int(total)
 415     else:
 416         total = None
 417     if (upto is not None and offset > upto) or \
 418         (total is not None and offset >= total) or \
 419         (total is not None and upto is not None and upto >= total):
 420         return None
 421
 422     if upto is None:
 423         length = None
 424     else:
 425         length = upto - offset + 1
 426     return (offset, length, total)
 427
 428 def get_sharing(request):
 429     """Parse an X-Object-Sharing header from the request.
 430
 431     Raises BadRequest on error.
 432     """
 433
 434     permissions = request.META.get('HTTP_X_OBJECT_SHARING')
 435     if permissions is None:
 436         return None
 437
 438     # TODO: Document or remove '~' replacing.
 439     permissions = permissions.replace('~', '')
 440
 441     ret = {}
 442     permissions = permissions.replace(' ', '')
 443     if permissions == '':
 444         return ret
 445     for perm in (x for x in permissions.split(';')):
 446         if perm.startswith('read='):
 447             ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
 448             if '' in ret['read']:
 449                 ret['read'].remove('')
 450             if '*' in ret['read']:
 451                 ret['read'] = ['*']
 452             if len(ret['read']) == 0:
 453                 raise BadRequest('Bad X-Object-Sharing header value')
 454         elif perm.startswith('write='):
 455             ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
 456             if '' in ret['write']:
 457                 ret['write'].remove('')
 458             if '*' in ret['write']:
 459                 ret['write'] = ['*']
 460             if len(ret['write']) == 0:
 461                 raise BadRequest('Bad X-Object-Sharing header value')
 462         else:
 463             raise BadRequest('Bad X-Object-Sharing header value')
 464
 465     # Keep duplicates only in write list.
 466     dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
 467     if dups:
 468         for x in dups:
 469             ret['read'].remove(x)
 470         if len(ret['read']) == 0:
 471             del(ret['read'])
 472
 473     return ret
 474
 475 def get_public(request):
 476     """Parse an X-Object-Public header from the request.
 477
 478     Raises BadRequest on error.
 479     """
 480
 481     public = request.META.get('HTTP_X_OBJECT_PUBLIC')
 482     if public is None:
 483         return None
 484
 485     public = public.replace(' ', '').lower()
 486     if public == 'true':
 487         return True
 488     elif public == 'false' or public == '':
 489         return False
 490     raise BadRequest('Bad X-Object-Public header value')
 491
 492 def raw_input_socket(request):
 493     """Return the socket for reading the rest of the request."""
 494
 495     server_software = request.META.get('SERVER_SOFTWARE')
 496     if server_software and server_software.startswith('mod_python'):
 497         return request._req
 498     if 'wsgi.input' in request.environ:
 499         return request.environ['wsgi.input']
 500     raise ServiceUnavailable('Unknown server software')
 501
 502 MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
 503
 504 def socket_read_iterator(request, length=0, blocksize=4096):
 505     """Return a maximum of blocksize data read from the socket in each iteration.
 506
 507     Read up to 'length'. If 'length' is negative, will attempt a chunked read.
 508     The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
 509     """
 510
 511     sock = raw_input_socket(request)
 512     if length < 0: # Chunked transfers
 513         # Small version (server does the dechunking).
 514         if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
 515             while length < MAX_UPLOAD_SIZE:
 516                 data = sock.read(blocksize)
 517                 if data == '':
 518                     return
 519                 yield data
 520             raise BadRequest('Maximum size is reached')
 521
 522         # Long version (do the dechunking).
 523         data = ''
 524         while length < MAX_UPLOAD_SIZE:
 525             # Get chunk size.
 526             if hasattr(sock, 'readline'):
 527                 chunk_length = sock.readline()
 528             else:
 529                 chunk_length = ''
 530                 while chunk_length[-1:] != '\n':
 531                     chunk_length += sock.read(1)
 532                 chunk_length.strip()
 533             pos = chunk_length.find(';')
 534             if pos >= 0:
 535                 chunk_length = chunk_length[:pos]
 536             try:
 537                 chunk_length = int(chunk_length, 16)
 538             except Exception, e:
 539                 raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
 540             # Check if done.
 541             if chunk_length == 0:
 542                 if len(data) > 0:
 543                     yield data
 544                 return
 545             # Get the actual data.
 546             while chunk_length > 0:
 547                 chunk = sock.read(min(chunk_length, blocksize))
 548                 chunk_length -= len(chunk)
 549                 if length > 0:
 550                     length += len(chunk)
 551                 data += chunk
 552                 if len(data) >= blocksize:
 553                     ret = data[:blocksize]
 554                     data = data[blocksize:]
 555                     yield ret
 556             sock.read(2) # CRLF
 557         raise BadRequest('Maximum size is reached')
 558     else:
 559         if length > MAX_UPLOAD_SIZE:
 560             raise BadRequest('Maximum size is reached')
 561         while length > 0:
 562             data = sock.read(min(length, blocksize))
 563             if not data:
 564                 raise BadRequest()
 565             length -= len(data)
 566             yield data
 567
 568 class SaveToBackendHandler(FileUploadHandler):
 569     """Handle a file from an HTML form the django way."""
 570
 571     def __init__(self, request=None):
 572         super(SaveToBackendHandler, self).__init__(request)
 573         self.backend = request.backend
 574
 575     def put_data(self, length):
 576         if len(self.data) >= length:
 577             block = self.data[:length]
 578             self.file.hashmap.append(self.backend.put_block(block))
 579             self.md5.update(block)
 580             self.data = self.data[length:]
 581
 582     def new_file(self, field_name, file_name, content_type, content_length, charset=None):
 583         self.md5 = hashlib.md5()
 584         self.data = ''
 585         self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
 586         self.file.size = 0
 587         self.file.hashmap = []
 588
 589     def receive_data_chunk(self, raw_data, start):
 590         self.data += raw_data
 591         self.file.size += len(raw_data)
 592         self.put_data(self.request.backend.block_size)
 593         return None
 594
 595     def file_complete(self, file_size):
 596         l = len(self.data)
 597         if l > 0:
 598             self.put_data(l)
 599         self.file.etag = self.md5.hexdigest().lower()
 600         return self.file
 601
 602 class ObjectWrapper(object):
 603     """Return the object's data block-per-block in each iteration.
 604
 605     Read from the object using the offset and length provided in each entry of the range list.
 606     """
 607
 608     def __init__(self, backend, ranges, sizes, hashmaps, boundary):
 609         self.backend = backend
 610         self.ranges = ranges
 611         self.sizes = sizes
 612         self.hashmaps = hashmaps
 613         self.boundary = boundary
 614         self.size = sum(self.sizes)
 615
 616         self.file_index = 0
 617         self.block_index = 0
 618         self.block_hash = -1
 619         self.block = ''
 620
 621         self.range_index = -1
 622         self.offset, self.length = self.ranges[0]
 623
 624     def __iter__(self):
 625         return self
 626
 627     def part_iterator(self):
 628         if self.length > 0:
 629             # Get the file for the current offset.
 630             file_size = self.sizes[self.file_index]
 631             while self.offset >= file_size:
 632                 self.offset -= file_size
 633                 self.file_index += 1
 634                 file_size = self.sizes[self.file_index]
 635
 636             # Get the block for the current position.
 637             self.block_index = int(self.offset / self.backend.block_size)
 638             if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
 639                 self.block_hash = self.hashmaps[self.file_index][self.block_index]
 640                 try:
 641                     self.block = self.backend.get_block(self.block_hash)
 642                 except NameError:
 643                     raise ItemNotFound('Block does not exist')
 644
 645             # Get the data from the block.
 646             bo = self.offset % self.backend.block_size
 647             bl = min(self.length, len(self.block) - bo)
 648             data = self.block[bo:bo + bl]
 649             self.offset += bl
 650             self.length -= bl
 651             return data
 652         else:
 653             raise StopIteration
 654
 655     def next(self):
 656         if len(self.ranges) == 1:
 657             return self.part_iterator()
 658         if self.range_index == len(self.ranges):
 659             raise StopIteration
 660         try:
 661             if self.range_index == -1:
 662                 raise StopIteration
 663             return self.part_iterator()
 664         except StopIteration:
 665             self.range_index += 1
 666             out = []
 667             if self.range_index < len(self.ranges):
 668                 # Part header.
 669                 self.offset, self.length = self.ranges[self.range_index]
 670                 self.file_index = 0
 671                 if self.range_index > 0:
 672                     out.append('')
 673                 out.append('--' + self.boundary)
 674                 out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
 675                 out.append('Content-Transfer-Encoding: binary')
 676                 out.append('')
 677                 out.append('')
 678                 return '\r\n'.join(out)
 679             else:
 680                 # Footer.
 681                 out.append('')
 682                 out.append('--' + self.boundary + '--')
 683                 out.append('')
 684                 return '\r\n'.join(out)
 685
 686 def object_data_response(request, sizes, hashmaps, meta, public=False):
 687     """Get the HttpResponse object for replying with the object's data."""
 688
 689     # Range handling.
 690     size = sum(sizes)
 691     ranges = get_range(request, size)
 692     if ranges is None:
 693         ranges = [(0, size)]
 694         ret = 200
 695     else:
 696         check = [True for offset, length in ranges if
 697                     length <= 0 or length > size or
 698                     offset < 0 or offset >= size or
 699                     offset + length > size]
 700         if len(check) > 0:
 701             raise RangeNotSatisfiable('Requested range exceeds object limits')
 702         ret = 206
 703         if_range = request.META.get('HTTP_IF_RANGE')
 704         if if_range:
 705             try:
 706                 # Modification time has passed instead.
 707                 last_modified = parse_http_date(if_range)
 708                 if last_modified != meta['modified']:
 709                     ranges = [(0, size)]
 710                     ret = 200
 711             except ValueError:
 712                 if if_range != meta['ETag']:
 713                     ranges = [(0, size)]
 714                     ret = 200
 715
 716     if ret == 206 and len(ranges) > 1:
 717         boundary = uuid.uuid4().hex
 718     else:
 719         boundary = ''
 720     wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
 721     response = HttpResponse(wrapper, status=ret)
 722     put_object_headers(response, meta, public)
 723     if ret == 206:
 724         if len(ranges) == 1:
 725             offset, length = ranges[0]
 726             response['Content-Length'] = length # Update with the correct length.
 727             response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
 728         else:
 729             del(response['Content-Length'])
 730             response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
 731     return response
 732
 733 def put_object_block(request, hashmap, data, offset):
 734     """Put one block of data at the given offset."""
 735
 736     bi = int(offset / request.backend.block_size)
 737     bo = offset % request.backend.block_size
 738     bl = min(len(data), request.backend.block_size - bo)
 739     if bi < len(hashmap):
 740         hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
 741     else:
 742         hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
 743     return bl # Return ammount of data written.
 744
 745 #def hashmap_hash(request, hashmap):
 746 #    """Produce the root hash, treating the hashmap as a Merkle-like tree."""
 747 #
 748 #    map = HashMap(request.backend.block_size, request.backend.hash_algorithm)
 749 #    map.extend([unhexlify(x) for x in hashmap])
 750 #    return hexlify(map.hash())
 751
 752 def hashmap_md5(request, hashmap, size):
 753     """Produce the MD5 sum from the data in the hashmap."""
 754
 755     # TODO: Search backend for the MD5 of another object with the same hashmap and size...
 756     md5 = hashlib.md5()
 757     bs = request.backend.block_size
 758     for bi, hash in enumerate(hashmap):
 759         data = request.backend.get_block(hash)
 760         if bi == len(hashmap) - 1:
 761             bs = size % bs
 762         pad = bs - min(len(data), bs)
 763         md5.update(data + ('\x00' * pad))
 764     return md5.hexdigest().lower()
 765
 766 def get_backend():
 767     backend = connect_backend(db_module=settings.BACKEND_DB_MODULE,
 768                               db_connection=settings.BACKEND_DB_CONNECTION,
 769                               block_module=settings.BACKEND_BLOCK_MODULE,
 770                               block_path=settings.BACKEND_BLOCK_PATH)
 771     backend.default_policy['quota'] = settings.BACKEND_QUOTA
 772     backend.default_policy['versioning'] = settings.BACKEND_VERSIONING
 773     return backend
 774
 775 def update_request_headers(request):
 776     # Handle URL-encoded keys and values.
 777     # Handle URL-encoded keys and values.
 778     meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
 779     if len(meta) > 90:
 780         raise BadRequest('Too many headers.')
 781     for k, v in meta.iteritems():
 782         if len(k) > 128:
 783             raise BadRequest('Header name too large.')
 784         if len(v) > 256:
 785             raise BadRequest('Header value too large.')
 786         try:
 787             k.decode('ascii')
 788             v.decode('ascii')
 789         except UnicodeDecodeError:
 790             raise BadRequest('Bad character in headers.')
 791         if '%' in k or '%' in v:
 792             del(request.META[k])
 793             request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
 794
 795 def update_response_headers(request, response):
 796     if request.serialization == 'xml':
 797         response['Content-Type'] = 'application/xml; charset=UTF-8'
 798     elif request.serialization == 'json':
 799         response['Content-Type'] = 'application/json; charset=UTF-8'
 800     elif not response['Content-Type']:
 801         response['Content-Type'] = 'text/plain; charset=UTF-8'
 802
 803     if (not response.has_header('Content-Length') and
 804         not (response.has_header('Content-Type') and
 805              response['Content-Type'].startswith('multipart/byteranges'))):
 806         response['Content-Length'] = len(response.content)
 807
 808     # URL-encode unicode in headers.
 809     meta = response.items()
 810     for k, v in meta:
 811         if (k.startswith('X-Account-') or k.startswith('X-Container-') or
 812             k.startswith('X-Object-') or k.startswith('Content-')):
 813             del(response[k])
 814             response[quote(k)] = quote(v, safe='/=,:@; ')
 815
 816     if settings.TEST:
 817         response['Date'] = format_date_time(time())
 818
 819 def render_fault(request, fault):
 820     if settings.DEBUG or settings.TEST:
 821         fault.details = format_exc(fault)
 822
 823     request.serialization = 'text'
 824     data = '\n'.join((fault.message, fault.details)) + '\n'
 825     response = HttpResponse(data, status=fault.code)
 826     update_response_headers(request, response)
 827     return response
 828
 829 def request_serialization(request, format_allowed=False):
 830     """Return the serialization format requested.
 831
 832     Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
 833     """
 834
 835     if not format_allowed:
 836         return 'text'
 837
 838     format = request.GET.get('format')
 839     if format == 'json':
 840         return 'json'
 841     elif format == 'xml':
 842         return 'xml'
 843
 844     for item in request.META.get('HTTP_ACCEPT', '').split(','):
 845         accept, sep, rest = item.strip().partition(';')
 846         if accept == 'application/json':
 847             return 'json'
 848         elif accept == 'application/xml' or accept == 'text/xml':
 849             return 'xml'
 850
 851     return 'text'
 852
 853 def api_method(http_method=None, format_allowed=False, user_required=True):
 854     """Decorator function for views that implement an API method."""
 855
 856     def decorator(func):
 857         @wraps(func)
 858         def wrapper(request, *args, **kwargs):
 859             try:
 860                 if http_method and request.method != http_method:
 861                     raise BadRequest('Method not allowed.')
 862                 if user_required and getattr(request, 'user', None) is None:
 863                     raise Unauthorized('Access denied')
 864
 865                 # The args variable may contain up to (account, container, object).
 866                 if len(args) > 1 and len(args[1]) > 256:
 867                     raise BadRequest('Container name too large.')
 868                 if len(args) > 2 and len(args[2]) > 1024:
 869                     raise BadRequest('Object name too large.')
 870
 871                 # Format and check headers.
 872                 update_request_headers(request)
 873
 874                 # Fill in custom request variables.
 875                 request.serialization = request_serialization(request, format_allowed)
 876                 request.backend = get_backend()
 877
 878                 response = func(request, *args, **kwargs)
 879                 update_response_headers(request, response)
 880                 return response
 881             except Fault, fault:
 882                 return render_fault(request, fault)
 883             except BaseException, e:
 884                 logger.exception('Unexpected error: %s' % e)
 885                 fault = ServiceUnavailable('Unexpected error')
 886                 return render_fault(request, fault)
 887             finally:
 888                 if getattr(request, 'backend', None) is not None:
 889                     request.backend.close()
 890         return wrapper
 891     return decorator