code.grnet.gr Git - pithos/blob - pithos/api/util.py

   1 # Copyright 2011 GRNET S.A. All rights reserved.
   2 #
   3 # Redistribution and use in source and binary forms, with or
   4 # without modification, are permitted provided that the following
   5 # conditions are met:
   6 #
   7 #   1. Redistributions of source code must retain the above
   8 #      copyright notice, this list of conditions and the following
   9 #      disclaimer.
  10 #
  11 #   2. Redistributions in binary form must reproduce the above
  12 #      copyright notice, this list of conditions and the following
  13 #      disclaimer in the documentation and/or other materials
  14 #      provided with the distribution.
  15 #
  16 # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
  17 # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  19 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
  20 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  23 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  24 # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  26 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27 # POSSIBILITY OF SUCH DAMAGE.
  28 #
  29 # The views and conclusions contained in the software and
  30 # documentation are those of the authors and should not be
  31 # interpreted as representing official policies, either expressed
  32 # or implied, of GRNET S.A.
  33
  34 from functools import wraps
  35 from time import time
  36 from traceback import format_exc
  37 from wsgiref.handlers import format_date_time
  38 from binascii import hexlify, unhexlify
  39 from datetime import datetime, tzinfo, timedelta
  40 from urllib import quote, unquote
  41
  42 from django.conf import settings
  43 from django.http import HttpResponse
  44 from django.utils import simplejson as json
  45 from django.utils.http import http_date, parse_etags
  46 from django.utils.encoding import smart_str
  47 from django.core.files.uploadhandler import FileUploadHandler
  48 from django.core.files.uploadedfile import UploadedFile
  49
  50 from pithos.api.compat import parse_http_date_safe, parse_http_date
  51 from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
  52                                 Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
  53                                 RangeNotSatisfiable, ServiceUnavailable)
  54 from pithos.api.short_url import encode_url
  55 from pithos.backends import connect_backend
  56 from pithos.backends.base import NotAllowedError, QuotaError
  57
  58 import logging
  59 import re
  60 import hashlib
  61 import uuid
  62 import decimal
  63
  64
  65 logger = logging.getLogger(__name__)
  66
  67
  68 class UTC(tzinfo):
  69    def utcoffset(self, dt):
  70        return timedelta(0)
  71
  72    def tzname(self, dt):
  73        return 'UTC'
  74
  75    def dst(self, dt):
  76        return timedelta(0)
  77
  78 def json_encode_decimal(obj):
  79     if isinstance(obj, decimal.Decimal):
  80         return str(obj)
  81     raise TypeError(repr(obj) + " is not JSON serializable")
  82
  83 def isoformat(d):
  84    """Return an ISO8601 date string that includes a timezone."""
  85
  86    return d.replace(tzinfo=UTC()).isoformat()
  87
  88 def rename_meta_key(d, old, new):
  89     if old not in d:
  90         return
  91     d[new] = d[old]
  92     del(d[old])
  93
  94 def printable_header_dict(d):
  95     """Format a meta dictionary for printing out json/xml.
  96
  97     Convert all keys to lower case and replace dashes with underscores.
  98     Format 'last_modified' timestamp.
  99     """
 100
 101     if 'last_modified' in d:
 102         d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
 103     return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
 104
 105 def format_header_key(k):
 106     """Convert underscores to dashes and capitalize intra-dash strings."""
 107     return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
 108
 109 def get_header_prefix(request, prefix):
 110     """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
 111
 112     prefix = 'HTTP_' + prefix.upper().replace('-', '_')
 113     # TODO: Document or remove '~' replacing.
 114     return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
 115
 116 def get_account_headers(request):
 117     meta = get_header_prefix(request, 'X-Account-Meta-')
 118     groups = {}
 119     for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
 120         n = k[16:].lower()
 121         if '-' in n or '_' in n:
 122             raise BadRequest('Bad characters in group name')
 123         groups[n] = v.replace(' ', '').split(',')
 124         while '' in groups[n]:
 125             groups[n].remove('')
 126     return meta, groups
 127
 128 def put_account_headers(response, meta, groups, policy):
 129     if 'count' in meta:
 130         response['X-Account-Container-Count'] = meta['count']
 131     if 'bytes' in meta:
 132         response['X-Account-Bytes-Used'] = meta['bytes']
 133     response['Last-Modified'] = http_date(int(meta['modified']))
 134     for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
 135         response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
 136     if 'until_timestamp' in meta:
 137         response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
 138     for k, v in groups.iteritems():
 139         k = smart_str(k, strings_only=True)
 140         k = format_header_key('X-Account-Group-' + k)
 141         v = smart_str(','.join(v), strings_only=True)
 142         response[k] = v
 143     for k, v in policy.iteritems():
 144         response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
 145
 146 def get_container_headers(request):
 147     meta = get_header_prefix(request, 'X-Container-Meta-')
 148     policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
 149     return meta, policy
 150
 151 def put_container_headers(request, response, meta, policy):
 152     if 'count' in meta:
 153         response['X-Container-Object-Count'] = meta['count']
 154     if 'bytes' in meta:
 155         response['X-Container-Bytes-Used'] = meta['bytes']
 156     response['Last-Modified'] = http_date(int(meta['modified']))
 157     for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
 158         response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
 159     l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
 160     response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
 161     response['X-Container-Block-Size'] = request.backend.block_size
 162     response['X-Container-Block-Hash'] = request.backend.hash_algorithm
 163     if 'until_timestamp' in meta:
 164         response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
 165     for k, v in policy.iteritems():
 166         response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
 167
 168 def get_object_headers(request):
 169     meta = get_header_prefix(request, 'X-Object-Meta-')
 170     if request.META.get('CONTENT_TYPE'):
 171         meta['Content-Type'] = request.META['CONTENT_TYPE']
 172     if request.META.get('HTTP_CONTENT_ENCODING'):
 173         meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
 174     if request.META.get('HTTP_CONTENT_DISPOSITION'):
 175         meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
 176     if request.META.get('HTTP_X_OBJECT_MANIFEST'):
 177         meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
 178     return meta, get_sharing(request), get_public(request)
 179
 180 def put_object_headers(response, meta, restricted=False):
 181     response['ETag'] = meta['ETag']
 182     response['Content-Length'] = meta['bytes']
 183     response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
 184     response['Last-Modified'] = http_date(int(meta['modified']))
 185     if not restricted:
 186         response['X-Object-Hash'] = meta['hash']
 187         response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
 188         response['X-Object-Version'] = meta['version']
 189         response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
 190         for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
 191             response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
 192         for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
 193                   'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
 194                   'X-Object-Public'):
 195             if k in meta:
 196                 response[k] = smart_str(meta[k], strings_only=True)
 197     else:
 198         for k in ('Content-Encoding', 'Content-Disposition'):
 199             if k in meta:
 200                 response[k] = meta[k]
 201
 202 def update_manifest_meta(request, v_account, meta):
 203     """Update metadata if the object has an X-Object-Manifest."""
 204
 205     if 'X-Object-Manifest' in meta:
 206         etag = ''
 207         bytes = 0
 208         try:
 209             src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
 210             objects = request.backend.list_objects(request.user_uniq, v_account,
 211                                 src_container, prefix=src_name, virtual=False)
 212             for x in objects:
 213                 src_meta = request.backend.get_object_meta(request.user_uniq,
 214                                         v_account, src_container, x[0], x[1])
 215                 etag += src_meta['ETag']
 216                 bytes += src_meta['bytes']
 217         except:
 218             # Ignore errors.
 219             return
 220         meta['bytes'] = bytes
 221         md5 = hashlib.md5()
 222         md5.update(etag)
 223         meta['ETag'] = md5.hexdigest().lower()
 224
 225 def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
 226     if permissions is None:
 227         return
 228     allowed, perm_path, perms = permissions
 229     if len(perms) == 0:
 230         return
 231     ret = []
 232     r = ','.join(perms.get('read', []))
 233     if r:
 234         ret.append('read=' + r)
 235     w = ','.join(perms.get('write', []))
 236     if w:
 237         ret.append('write=' + w)
 238     meta['X-Object-Sharing'] = '; '.join(ret)
 239     if '/'.join((v_account, v_container, v_object)) != perm_path:
 240         meta['X-Object-Shared-By'] = perm_path
 241     if request.user_uniq != v_account:
 242         meta['X-Object-Allowed-To'] = allowed
 243
 244 def update_public_meta(public, meta):
 245     if not public:
 246         return
 247     meta['X-Object-Public'] = '/public/' + encode_url(public)
 248
 249 def validate_modification_preconditions(request, meta):
 250     """Check that the modified timestamp conforms with the preconditions set."""
 251
 252     if 'modified' not in meta:
 253         return # TODO: Always return?
 254
 255     if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
 256     if if_modified_since is not None:
 257         if_modified_since = parse_http_date_safe(if_modified_since)
 258     if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
 259         raise NotModified('Resource has not been modified')
 260
 261     if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
 262     if if_unmodified_since is not None:
 263         if_unmodified_since = parse_http_date_safe(if_unmodified_since)
 264     if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
 265         raise PreconditionFailed('Resource has been modified')
 266
 267 def validate_matching_preconditions(request, meta):
 268     """Check that the ETag conforms with the preconditions set."""
 269
 270     etag = meta.get('ETag', None)
 271
 272     if_match = request.META.get('HTTP_IF_MATCH')
 273     if if_match is not None:
 274         if etag is None:
 275             raise PreconditionFailed('Resource does not exist')
 276         if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
 277             raise PreconditionFailed('Resource ETag does not match')
 278
 279     if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
 280     if if_none_match is not None:
 281         # TODO: If this passes, must ignore If-Modified-Since header.
 282         if etag is not None:
 283             if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
 284                 # TODO: Continue if an If-Modified-Since header is present.
 285                 if request.method in ('HEAD', 'GET'):
 286                     raise NotModified('Resource ETag matches')
 287                 raise PreconditionFailed('Resource exists or ETag matches')
 288
 289 def split_container_object_string(s):
 290     if not len(s) > 0 or s[0] != '/':
 291         raise ValueError
 292     s = s[1:]
 293     pos = s.find('/')
 294     if pos == -1 or pos == len(s) - 1:
 295         raise ValueError
 296     return s[:pos], s[(pos + 1):]
 297
 298 def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
 299     """Copy or move an object."""
 300
 301     meta, permissions, public = get_object_headers(request)
 302     src_version = request.META.get('HTTP_X_SOURCE_VERSION')
 303     try:
 304         if move:
 305             version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
 306                                                         dest_account, dest_container, dest_name,
 307                                                         meta, False, permissions)
 308         else:
 309             version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
 310                                                         dest_account, dest_container, dest_name,
 311                                                         meta, False, permissions, src_version)
 312     except NotAllowedError:
 313         raise Forbidden('Not allowed')
 314     except (NameError, IndexError):
 315         raise ItemNotFound('Container or object does not exist')
 316     except ValueError:
 317         raise BadRequest('Invalid sharing header')
 318     except AttributeError, e:
 319         raise Conflict('\n'.join(e.data) + '\n')
 320     except QuotaError:
 321         raise RequestEntityTooLarge('Quota exceeded')
 322     if public is not None:
 323         try:
 324             request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
 325         except NotAllowedError:
 326             raise Forbidden('Not allowed')
 327         except NameError:
 328             raise ItemNotFound('Object does not exist')
 329     return version_id
 330
 331 def get_int_parameter(p):
 332     if p is not None:
 333         try:
 334             p = int(p)
 335         except ValueError:
 336             return None
 337         if p < 0:
 338             return None
 339     return p
 340
 341 def get_content_length(request):
 342     content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
 343     if content_length is None:
 344         raise LengthRequired('Missing or invalid Content-Length header')
 345     return content_length
 346
 347 def get_range(request, size):
 348     """Parse a Range header from the request.
 349
 350     Either returns None, when the header is not existent or should be ignored,
 351     or a list of (offset, length) tuples - should be further checked.
 352     """
 353
 354     ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
 355     if not ranges.startswith('bytes='):
 356         return None
 357
 358     ret = []
 359     for r in (x.strip() for x in ranges[6:].split(',')):
 360         p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
 361         m = p.match(r)
 362         if not m:
 363             return None
 364         offset = m.group('offset')
 365         upto = m.group('upto')
 366         if offset == '' and upto == '':
 367             return None
 368
 369         if offset != '':
 370             offset = int(offset)
 371             if upto != '':
 372                 upto = int(upto)
 373                 if offset > upto:
 374                     return None
 375                 ret.append((offset, upto - offset + 1))
 376             else:
 377                 ret.append((offset, size - offset))
 378         else:
 379             length = int(upto)
 380             ret.append((size - length, length))
 381
 382     return ret
 383
 384 def get_content_range(request):
 385     """Parse a Content-Range header from the request.
 386
 387     Either returns None, when the header is not existent or should be ignored,
 388     or an (offset, length, total) tuple - check as length, total may be None.
 389     Returns (None, None, None) if the provided range is '*/*'.
 390     """
 391
 392     ranges = request.META.get('HTTP_CONTENT_RANGE', '')
 393     if not ranges:
 394         return None
 395
 396     p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
 397     m = p.match(ranges)
 398     if not m:
 399         if ranges == 'bytes */*':
 400             return (None, None, None)
 401         return None
 402     offset = int(m.group('offset'))
 403     upto = m.group('upto')
 404     total = m.group('total')
 405     if upto != '':
 406         upto = int(upto)
 407     else:
 408         upto = None
 409     if total != '*':
 410         total = int(total)
 411     else:
 412         total = None
 413     if (upto is not None and offset > upto) or \
 414         (total is not None and offset >= total) or \
 415         (total is not None and upto is not None and upto >= total):
 416         return None
 417
 418     if upto is None:
 419         length = None
 420     else:
 421         length = upto - offset + 1
 422     return (offset, length, total)
 423
 424 def get_sharing(request):
 425     """Parse an X-Object-Sharing header from the request.
 426
 427     Raises BadRequest on error.
 428     """
 429
 430     permissions = request.META.get('HTTP_X_OBJECT_SHARING')
 431     if permissions is None:
 432         return None
 433
 434     # TODO: Document or remove '~' replacing.
 435     permissions = permissions.replace('~', '')
 436
 437     ret = {}
 438     permissions = permissions.replace(' ', '')
 439     if permissions == '':
 440         return ret
 441     for perm in (x for x in permissions.split(';')):
 442         if perm.startswith('read='):
 443             ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
 444             if '' in ret['read']:
 445                 ret['read'].remove('')
 446             if '*' in ret['read']:
 447                 ret['read'] = ['*']
 448             if len(ret['read']) == 0:
 449                 raise BadRequest('Bad X-Object-Sharing header value')
 450         elif perm.startswith('write='):
 451             ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
 452             if '' in ret['write']:
 453                 ret['write'].remove('')
 454             if '*' in ret['write']:
 455                 ret['write'] = ['*']
 456             if len(ret['write']) == 0:
 457                 raise BadRequest('Bad X-Object-Sharing header value')
 458         else:
 459             raise BadRequest('Bad X-Object-Sharing header value')
 460
 461     # Keep duplicates only in write list.
 462     dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
 463     if dups:
 464         for x in dups:
 465             ret['read'].remove(x)
 466         if len(ret['read']) == 0:
 467             del(ret['read'])
 468
 469     return ret
 470
 471 def get_public(request):
 472     """Parse an X-Object-Public header from the request.
 473
 474     Raises BadRequest on error.
 475     """
 476
 477     public = request.META.get('HTTP_X_OBJECT_PUBLIC')
 478     if public is None:
 479         return None
 480
 481     public = public.replace(' ', '').lower()
 482     if public == 'true':
 483         return True
 484     elif public == 'false' or public == '':
 485         return False
 486     raise BadRequest('Bad X-Object-Public header value')
 487
 488 def raw_input_socket(request):
 489     """Return the socket for reading the rest of the request."""
 490
 491     server_software = request.META.get('SERVER_SOFTWARE')
 492     if server_software and server_software.startswith('mod_python'):
 493         return request._req
 494     if 'wsgi.input' in request.environ:
 495         return request.environ['wsgi.input']
 496     raise ServiceUnavailable('Unknown server software')
 497
 498 MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
 499
 500 def socket_read_iterator(request, length=0, blocksize=4096):
 501     """Return a maximum of blocksize data read from the socket in each iteration.
 502
 503     Read up to 'length'. If 'length' is negative, will attempt a chunked read.
 504     The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
 505     """
 506
 507     sock = raw_input_socket(request)
 508     if length < 0: # Chunked transfers
 509         # Small version (server does the dechunking).
 510         if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
 511             while length < MAX_UPLOAD_SIZE:
 512                 data = sock.read(blocksize)
 513                 if data == '':
 514                     return
 515                 yield data
 516             raise BadRequest('Maximum size is reached')
 517
 518         # Long version (do the dechunking).
 519         data = ''
 520         while length < MAX_UPLOAD_SIZE:
 521             # Get chunk size.
 522             if hasattr(sock, 'readline'):
 523                 chunk_length = sock.readline()
 524             else:
 525                 chunk_length = ''
 526                 while chunk_length[-1:] != '\n':
 527                     chunk_length += sock.read(1)
 528                 chunk_length.strip()
 529             pos = chunk_length.find(';')
 530             if pos >= 0:
 531                 chunk_length = chunk_length[:pos]
 532             try:
 533                 chunk_length = int(chunk_length, 16)
 534             except Exception, e:
 535                 raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
 536             # Check if done.
 537             if chunk_length == 0:
 538                 if len(data) > 0:
 539                     yield data
 540                 return
 541             # Get the actual data.
 542             while chunk_length > 0:
 543                 chunk = sock.read(min(chunk_length, blocksize))
 544                 chunk_length -= len(chunk)
 545                 if length > 0:
 546                     length += len(chunk)
 547                 data += chunk
 548                 if len(data) >= blocksize:
 549                     ret = data[:blocksize]
 550                     data = data[blocksize:]
 551                     yield ret
 552             sock.read(2) # CRLF
 553         raise BadRequest('Maximum size is reached')
 554     else:
 555         if length > MAX_UPLOAD_SIZE:
 556             raise BadRequest('Maximum size is reached')
 557         while length > 0:
 558             data = sock.read(min(length, blocksize))
 559             if not data:
 560                 raise BadRequest()
 561             length -= len(data)
 562             yield data
 563
 564 class SaveToBackendHandler(FileUploadHandler):
 565     """Handle a file from an HTML form the django way."""
 566
 567     def __init__(self, request=None):
 568         super(SaveToBackendHandler, self).__init__(request)
 569         self.backend = request.backend
 570
 571     def put_data(self, length):
 572         if len(self.data) >= length:
 573             block = self.data[:length]
 574             self.file.hashmap.append(self.backend.put_block(block))
 575             self.md5.update(block)
 576             self.data = self.data[length:]
 577
 578     def new_file(self, field_name, file_name, content_type, content_length, charset=None):
 579         self.md5 = hashlib.md5()
 580         self.data = ''
 581         self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
 582         self.file.size = 0
 583         self.file.hashmap = []
 584
 585     def receive_data_chunk(self, raw_data, start):
 586         self.data += raw_data
 587         self.file.size += len(raw_data)
 588         self.put_data(self.request.backend.block_size)
 589         return None
 590
 591     def file_complete(self, file_size):
 592         l = len(self.data)
 593         if l > 0:
 594             self.put_data(l)
 595         self.file.etag = self.md5.hexdigest().lower()
 596         return self.file
 597
 598 class ObjectWrapper(object):
 599     """Return the object's data block-per-block in each iteration.
 600
 601     Read from the object using the offset and length provided in each entry of the range list.
 602     """
 603
 604     def __init__(self, backend, ranges, sizes, hashmaps, boundary):
 605         self.backend = backend
 606         self.ranges = ranges
 607         self.sizes = sizes
 608         self.hashmaps = hashmaps
 609         self.boundary = boundary
 610         self.size = sum(self.sizes)
 611
 612         self.file_index = 0
 613         self.block_index = 0
 614         self.block_hash = -1
 615         self.block = ''
 616
 617         self.range_index = -1
 618         self.offset, self.length = self.ranges[0]
 619
 620     def __iter__(self):
 621         return self
 622
 623     def part_iterator(self):
 624         if self.length > 0:
 625             # Get the file for the current offset.
 626             file_size = self.sizes[self.file_index]
 627             while self.offset >= file_size:
 628                 self.offset -= file_size
 629                 self.file_index += 1
 630                 file_size = self.sizes[self.file_index]
 631
 632             # Get the block for the current position.
 633             self.block_index = int(self.offset / self.backend.block_size)
 634             if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
 635                 self.block_hash = self.hashmaps[self.file_index][self.block_index]
 636                 try:
 637                     self.block = self.backend.get_block(self.block_hash)
 638                 except NameError:
 639                     raise ItemNotFound('Block does not exist')
 640
 641             # Get the data from the block.
 642             bo = self.offset % self.backend.block_size
 643             bl = min(self.length, len(self.block) - bo)
 644             data = self.block[bo:bo + bl]
 645             self.offset += bl
 646             self.length -= bl
 647             return data
 648         else:
 649             raise StopIteration
 650
 651     def next(self):
 652         if len(self.ranges) == 1:
 653             return self.part_iterator()
 654         if self.range_index == len(self.ranges):
 655             raise StopIteration
 656         try:
 657             if self.range_index == -1:
 658                 raise StopIteration
 659             return self.part_iterator()
 660         except StopIteration:
 661             self.range_index += 1
 662             out = []
 663             if self.range_index < len(self.ranges):
 664                 # Part header.
 665                 self.offset, self.length = self.ranges[self.range_index]
 666                 self.file_index = 0
 667                 if self.range_index > 0:
 668                     out.append('')
 669                 out.append('--' + self.boundary)
 670                 out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
 671                 out.append('Content-Transfer-Encoding: binary')
 672                 out.append('')
 673                 out.append('')
 674                 return '\r\n'.join(out)
 675             else:
 676                 # Footer.
 677                 out.append('')
 678                 out.append('--' + self.boundary + '--')
 679                 out.append('')
 680                 return '\r\n'.join(out)
 681
 682 def object_data_response(request, sizes, hashmaps, meta, public=False):
 683     """Get the HttpResponse object for replying with the object's data."""
 684
 685     # Range handling.
 686     size = sum(sizes)
 687     ranges = get_range(request, size)
 688     if ranges is None:
 689         ranges = [(0, size)]
 690         ret = 200
 691     else:
 692         check = [True for offset, length in ranges if
 693                     length <= 0 or length > size or
 694                     offset < 0 or offset >= size or
 695                     offset + length > size]
 696         if len(check) > 0:
 697             raise RangeNotSatisfiable('Requested range exceeds object limits')
 698         ret = 206
 699         if_range = request.META.get('HTTP_IF_RANGE')
 700         if if_range:
 701             try:
 702                 # Modification time has passed instead.
 703                 last_modified = parse_http_date(if_range)
 704                 if last_modified != meta['modified']:
 705                     ranges = [(0, size)]
 706                     ret = 200
 707             except ValueError:
 708                 if if_range != meta['ETag']:
 709                     ranges = [(0, size)]
 710                     ret = 200
 711
 712     if ret == 206 and len(ranges) > 1:
 713         boundary = uuid.uuid4().hex
 714     else:
 715         boundary = ''
 716     wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
 717     response = HttpResponse(wrapper, status=ret)
 718     put_object_headers(response, meta, public)
 719     if ret == 206:
 720         if len(ranges) == 1:
 721             offset, length = ranges[0]
 722             response['Content-Length'] = length # Update with the correct length.
 723             response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
 724         else:
 725             del(response['Content-Length'])
 726             response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
 727     return response
 728
 729 def put_object_block(request, hashmap, data, offset):
 730     """Put one block of data at the given offset."""
 731
 732     bi = int(offset / request.backend.block_size)
 733     bo = offset % request.backend.block_size
 734     bl = min(len(data), request.backend.block_size - bo)
 735     if bi < len(hashmap):
 736         hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
 737     else:
 738         hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
 739     return bl # Return ammount of data written.
 740
 741 def hashmap_hash(request, hashmap):
 742     """Produce the root hash, treating the hashmap as a Merkle-like tree."""
 743
 744     def subhash(d):
 745         h = hashlib.new(request.backend.hash_algorithm)
 746         h.update(d)
 747         return h.digest()
 748
 749     if len(hashmap) == 0:
 750         return hexlify(subhash(''))
 751     if len(hashmap) == 1:
 752         return hashmap[0]
 753
 754     s = 2
 755     while s < len(hashmap):
 756         s = s * 2
 757     h = [unhexlify(x) for x in hashmap]
 758     h += [('\x00' * len(h[0]))] * (s - len(hashmap))
 759     while len(h) > 1:
 760         h = [subhash(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
 761     return hexlify(h[0])
 762
 763 def update_request_headers(request):
 764     # Handle URL-encoded keys and values.
 765     # Handle URL-encoded keys and values.
 766     meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
 767     if len(meta) > 90:
 768         raise BadRequest('Too many headers.')
 769     for k, v in meta.iteritems():
 770         if len(k) > 128:
 771             raise BadRequest('Header name too large.')
 772         if len(v) > 256:
 773             raise BadRequest('Header value too large.')
 774         try:
 775             k.decode('ascii')
 776             v.decode('ascii')
 777         except UnicodeDecodeError:
 778             raise BadRequest('Bad character in headers.')
 779         if '%' in k or '%' in v:
 780             del(request.META[k])
 781             request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
 782
 783 def update_response_headers(request, response):
 784     if request.serialization == 'xml':
 785         response['Content-Type'] = 'application/xml; charset=UTF-8'
 786     elif request.serialization == 'json':
 787         response['Content-Type'] = 'application/json; charset=UTF-8'
 788     elif not response['Content-Type']:
 789         response['Content-Type'] = 'text/plain; charset=UTF-8'
 790
 791     if (not response.has_header('Content-Length') and
 792         not (response.has_header('Content-Type') and
 793              response['Content-Type'].startswith('multipart/byteranges'))):
 794         response['Content-Length'] = len(response.content)
 795
 796     # URL-encode unicode in headers.
 797     meta = response.items()
 798     for k, v in meta:
 799         if (k.startswith('X-Account-') or k.startswith('X-Container-') or
 800             k.startswith('X-Object-') or k.startswith('Content-')):
 801             del(response[k])
 802             response[quote(k)] = quote(v, safe='/=,:@; ')
 803
 804     if settings.TEST:
 805         response['Date'] = format_date_time(time())
 806
 807 def render_fault(request, fault):
 808     if settings.DEBUG or settings.TEST:
 809         fault.details = format_exc(fault)
 810
 811     request.serialization = 'text'
 812     data = '\n'.join((fault.message, fault.details)) + '\n'
 813     response = HttpResponse(data, status=fault.code)
 814     update_response_headers(request, response)
 815     return response
 816
 817 def request_serialization(request, format_allowed=False):
 818     """Return the serialization format requested.
 819
 820     Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
 821     """
 822
 823     if not format_allowed:
 824         return 'text'
 825
 826     format = request.GET.get('format')
 827     if format == 'json':
 828         return 'json'
 829     elif format == 'xml':
 830         return 'xml'
 831
 832     for item in request.META.get('HTTP_ACCEPT', '').split(','):
 833         accept, sep, rest = item.strip().partition(';')
 834         if accept == 'application/json':
 835             return 'json'
 836         elif accept == 'application/xml' or accept == 'text/xml':
 837             return 'xml'
 838
 839     return 'text'
 840
 841 def api_method(http_method=None, format_allowed=False, user_required=True):
 842     """Decorator function for views that implement an API method."""
 843
 844     def decorator(func):
 845         @wraps(func)
 846         def wrapper(request, *args, **kwargs):
 847             try:
 848                 if http_method and request.method != http_method:
 849                     raise BadRequest('Method not allowed.')
 850                 if user_required and getattr(request, 'user', None) is None:
 851                     raise Unauthorized('Access denied')
 852
 853                 # The args variable may contain up to (account, container, object).
 854                 if len(args) > 1 and len(args[1]) > 256:
 855                     raise BadRequest('Container name too large.')
 856                 if len(args) > 2 and len(args[2]) > 1024:
 857                     raise BadRequest('Object name too large.')
 858
 859                 # Format and check headers.
 860                 update_request_headers(request)
 861
 862                 # Fill in custom request variables.
 863                 request.serialization = request_serialization(request, format_allowed)
 864                 request.backend = connect_backend()
 865
 866                 response = func(request, *args, **kwargs)
 867                 update_response_headers(request, response)
 868                 return response
 869             except Fault, fault:
 870                 return render_fault(request, fault)
 871             except BaseException, e:
 872                 logger.exception('Unexpected error: %s' % e)
 873                 fault = ServiceUnavailable('Unexpected error')
 874                 return render_fault(request, fault)
 875             finally:
 876                 if getattr(request, 'backend', None) is not None:
 877                     request.backend.close()
 878         return wrapper
 879     return decorator