code.grnet.gr Git - pithos/blob - snf-pithos-app/pithos/api/util.py

   1 # Copyright 2011-2012 GRNET S.A. All rights reserved.
   2 #
   3 # Redistribution and use in source and binary forms, with or
   4 # without modification, are permitted provided that the following
   5 # conditions are met:
   6 #
   7 #   1. Redistributions of source code must retain the above
   8 #      copyright notice, this list of conditions and the following
   9 #      disclaimer.
  10 #
  11 #   2. Redistributions in binary form must reproduce the above
  12 #      copyright notice, this list of conditions and the following
  13 #      disclaimer in the documentation and/or other materials
  14 #      provided with the distribution.
  15 #
  16 # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
  17 # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  19 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
  20 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  23 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  24 # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  26 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27 # POSSIBILITY OF SUCH DAMAGE.
  28 #
  29 # The views and conclusions contained in the software and
  30 # documentation are those of the authors and should not be
  31 # interpreted as representing official policies, either expressed
  32 # or implied, of GRNET S.A.
  33
  34 from functools import wraps
  35 from time import time
  36 from traceback import format_exc
  37 from wsgiref.handlers import format_date_time
  38 from binascii import hexlify, unhexlify
  39 from datetime import datetime, tzinfo, timedelta
  40 from urllib import quote, unquote
  41
  42 from django.conf import settings
  43 from django.http import HttpResponse
  44 from django.utils import simplejson as json
  45 from django.utils.http import http_date, parse_etags
  46 from django.utils.encoding import smart_unicode, smart_str
  47 from django.core.files.uploadhandler import FileUploadHandler
  48 from django.core.files.uploadedfile import UploadedFile
  49
  50 from pithos.lib.compat import parse_http_date_safe, parse_http_date
  51
  52 from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
  53                                 Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
  54                                 RangeNotSatisfiable, InternalServerError, NotImplemented)
  55 from pithos.api.short_url import encode_url
  56 from pithos.backends import connect_backend
  57 from pithos.backends.base import NotAllowedError, QuotaError
  58
  59 import logging
  60 import re
  61 import hashlib
  62 import uuid
  63 import decimal
  64
  65
  66 logger = logging.getLogger(__name__)
  67
  68
  69 class UTC(tzinfo):
  70    def utcoffset(self, dt):
  71        return timedelta(0)
  72
  73    def tzname(self, dt):
  74        return 'UTC'
  75
  76    def dst(self, dt):
  77        return timedelta(0)
  78
  79 def json_encode_decimal(obj):
  80     if isinstance(obj, decimal.Decimal):
  81         return str(obj)
  82     raise TypeError(repr(obj) + " is not JSON serializable")
  83
  84 def isoformat(d):
  85    """Return an ISO8601 date string that includes a timezone."""
  86
  87    return d.replace(tzinfo=UTC()).isoformat()
  88
  89 def rename_meta_key(d, old, new):
  90     if old not in d:
  91         return
  92     d[new] = d[old]
  93     del(d[old])
  94
  95 def printable_header_dict(d):
  96     """Format a meta dictionary for printing out json/xml.
  97
  98     Convert all keys to lower case and replace dashes with underscores.
  99     Format 'last_modified' timestamp.
 100     """
 101
 102     if 'last_modified' in d:
 103         d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
 104     return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
 105
 106 def format_header_key(k):
 107     """Convert underscores to dashes and capitalize intra-dash strings."""
 108     return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
 109
 110 def get_header_prefix(request, prefix):
 111     """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
 112
 113     prefix = 'HTTP_' + prefix.upper().replace('-', '_')
 114     # TODO: Document or remove '~' replacing.
 115     return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
 116
 117 def get_account_headers(request):
 118     meta = get_header_prefix(request, 'X-Account-Meta-')
 119     groups = {}
 120     for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
 121         n = k[16:].lower()
 122         if '-' in n or '_' in n:
 123             raise BadRequest('Bad characters in group name')
 124         groups[n] = v.replace(' ', '').split(',')
 125         while '' in groups[n]:
 126             groups[n].remove('')
 127     return meta, groups
 128
 129 def put_account_headers(response, meta, groups, policy):
 130     if 'count' in meta:
 131         response['X-Account-Container-Count'] = meta['count']
 132     if 'bytes' in meta:
 133         response['X-Account-Bytes-Used'] = meta['bytes']
 134     response['Last-Modified'] = http_date(int(meta['modified']))
 135     for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
 136         response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
 137     if 'until_timestamp' in meta:
 138         response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
 139     for k, v in groups.iteritems():
 140         k = smart_str(k, strings_only=True)
 141         k = format_header_key('X-Account-Group-' + k)
 142         v = smart_str(','.join(v), strings_only=True)
 143         response[k] = v
 144     for k, v in policy.iteritems():
 145         response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
 146
 147 def get_container_headers(request):
 148     meta = get_header_prefix(request, 'X-Container-Meta-')
 149     policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
 150     return meta, policy
 151
 152 def put_container_headers(request, response, meta, policy):
 153     if 'count' in meta:
 154         response['X-Container-Object-Count'] = meta['count']
 155     if 'bytes' in meta:
 156         response['X-Container-Bytes-Used'] = meta['bytes']
 157     response['Last-Modified'] = http_date(int(meta['modified']))
 158     for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
 159         response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
 160     l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
 161     response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
 162     response['X-Container-Block-Size'] = request.backend.block_size
 163     response['X-Container-Block-Hash'] = request.backend.hash_algorithm
 164     if 'until_timestamp' in meta:
 165         response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
 166     for k, v in policy.iteritems():
 167         response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
 168
 169 def get_object_headers(request):
 170     meta = get_header_prefix(request, 'X-Object-Meta-')
 171     if request.META.get('CONTENT_TYPE'):
 172         meta['Content-Type'] = request.META['CONTENT_TYPE']
 173     if request.META.get('HTTP_CONTENT_ENCODING'):
 174         meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
 175     if request.META.get('HTTP_CONTENT_DISPOSITION'):
 176         meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
 177     if request.META.get('HTTP_X_OBJECT_MANIFEST'):
 178         meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
 179     return meta, get_sharing(request), get_public(request)
 180
 181 def put_object_headers(response, meta, restricted=False):
 182     if 'ETag' in meta:
 183         response['ETag'] = meta['ETag']
 184     response['Content-Length'] = meta['bytes']
 185     response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
 186     response['Last-Modified'] = http_date(int(meta['modified']))
 187     if not restricted:
 188         response['X-Object-Hash'] = meta['hash']
 189         response['X-Object-UUID'] = meta['uuid']
 190         response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
 191         response['X-Object-Version'] = meta['version']
 192         response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
 193         for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
 194             response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
 195         for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
 196                   'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
 197                   'X-Object-Public'):
 198             if k in meta:
 199                 response[k] = smart_str(meta[k], strings_only=True)
 200     else:
 201         for k in ('Content-Encoding', 'Content-Disposition'):
 202             if k in meta:
 203                 response[k] = smart_str(meta[k], strings_only=True)
 204
 205 def update_manifest_meta(request, v_account, meta):
 206     """Update metadata if the object has an X-Object-Manifest."""
 207
 208     if 'X-Object-Manifest' in meta:
 209         etag = ''
 210         bytes = 0
 211         try:
 212             src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
 213             objects = request.backend.list_objects(request.user_uniq, v_account,
 214                                 src_container, prefix=src_name, virtual=False)
 215             for x in objects:
 216                 src_meta = request.backend.get_object_meta(request.user_uniq,
 217                                         v_account, src_container, x[0], 'pithos', x[1])
 218                 if 'ETag' in src_meta:
 219                     etag += src_meta['ETag']
 220                 bytes += src_meta['bytes']
 221         except:
 222             # Ignore errors.
 223             return
 224         meta['bytes'] = bytes
 225         md5 = hashlib.md5()
 226         md5.update(etag)
 227         meta['ETag'] = md5.hexdigest().lower()
 228
 229 def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
 230     if permissions is None:
 231         return
 232     allowed, perm_path, perms = permissions
 233     if len(perms) == 0:
 234         return
 235     ret = []
 236     r = ','.join(perms.get('read', []))
 237     if r:
 238         ret.append('read=' + r)
 239     w = ','.join(perms.get('write', []))
 240     if w:
 241         ret.append('write=' + w)
 242     meta['X-Object-Sharing'] = '; '.join(ret)
 243     if '/'.join((v_account, v_container, v_object)) != perm_path:
 244         meta['X-Object-Shared-By'] = perm_path
 245     if request.user_uniq != v_account:
 246         meta['X-Object-Allowed-To'] = allowed
 247
 248 def update_public_meta(public, meta):
 249     if not public:
 250         return
 251     meta['X-Object-Public'] = '/public/' + encode_url(public)
 252
 253 def validate_modification_preconditions(request, meta):
 254     """Check that the modified timestamp conforms with the preconditions set."""
 255
 256     if 'modified' not in meta:
 257         return # TODO: Always return?
 258
 259     if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
 260     if if_modified_since is not None:
 261         if_modified_since = parse_http_date_safe(if_modified_since)
 262     if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
 263         raise NotModified('Resource has not been modified')
 264
 265     if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
 266     if if_unmodified_since is not None:
 267         if_unmodified_since = parse_http_date_safe(if_unmodified_since)
 268     if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
 269         raise PreconditionFailed('Resource has been modified')
 270
 271 def validate_matching_preconditions(request, meta):
 272     """Check that the ETag conforms with the preconditions set."""
 273
 274     etag = meta.get('ETag', None)
 275
 276     if_match = request.META.get('HTTP_IF_MATCH')
 277     if if_match is not None:
 278         if etag is None:
 279             raise PreconditionFailed('Resource does not exist')
 280         if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
 281             raise PreconditionFailed('Resource ETag does not match')
 282
 283     if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
 284     if if_none_match is not None:
 285         # TODO: If this passes, must ignore If-Modified-Since header.
 286         if etag is not None:
 287             if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
 288                 # TODO: Continue if an If-Modified-Since header is present.
 289                 if request.method in ('HEAD', 'GET'):
 290                     raise NotModified('Resource ETag matches')
 291                 raise PreconditionFailed('Resource exists or ETag matches')
 292
 293 def split_container_object_string(s):
 294     if not len(s) > 0 or s[0] != '/':
 295         raise ValueError
 296     s = s[1:]
 297     pos = s.find('/')
 298     if pos == -1 or pos == len(s) - 1:
 299         raise ValueError
 300     return s[:pos], s[(pos + 1):]
 301
 302 def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
 303     """Copy or move an object."""
 304
 305     meta, permissions, public = get_object_headers(request)
 306     src_version = request.META.get('HTTP_X_SOURCE_VERSION')
 307     try:
 308         if move:
 309             version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
 310                                                         dest_account, dest_container, dest_name,
 311                                                         'pithos', meta, False, permissions)
 312         else:
 313             version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
 314                                                         dest_account, dest_container, dest_name,
 315                                                         'pithos', meta, False, permissions, src_version)
 316     except NotAllowedError:
 317         raise Forbidden('Not allowed')
 318     except (NameError, IndexError):
 319         raise ItemNotFound('Container or object does not exist')
 320     except ValueError:
 321         raise BadRequest('Invalid sharing header')
 322     except AttributeError, e:
 323         raise Conflict('\n'.join(e.data) + '\n')
 324     except QuotaError:
 325         raise RequestEntityTooLarge('Quota exceeded')
 326     if public is not None:
 327         try:
 328             request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
 329         except NotAllowedError:
 330             raise Forbidden('Not allowed')
 331         except NameError:
 332             raise ItemNotFound('Object does not exist')
 333     return version_id
 334
 335 def get_int_parameter(p):
 336     if p is not None:
 337         try:
 338             p = int(p)
 339         except ValueError:
 340             return None
 341         if p < 0:
 342             return None
 343     return p
 344
 345 def get_content_length(request):
 346     content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
 347     if content_length is None:
 348         raise LengthRequired('Missing or invalid Content-Length header')
 349     return content_length
 350
 351 def get_range(request, size):
 352     """Parse a Range header from the request.
 353
 354     Either returns None, when the header is not existent or should be ignored,
 355     or a list of (offset, length) tuples - should be further checked.
 356     """
 357
 358     ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
 359     if not ranges.startswith('bytes='):
 360         return None
 361
 362     ret = []
 363     for r in (x.strip() for x in ranges[6:].split(',')):
 364         p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
 365         m = p.match(r)
 366         if not m:
 367             return None
 368         offset = m.group('offset')
 369         upto = m.group('upto')
 370         if offset == '' and upto == '':
 371             return None
 372
 373         if offset != '':
 374             offset = int(offset)
 375             if upto != '':
 376                 upto = int(upto)
 377                 if offset > upto:
 378                     return None
 379                 ret.append((offset, upto - offset + 1))
 380             else:
 381                 ret.append((offset, size - offset))
 382         else:
 383             length = int(upto)
 384             ret.append((size - length, length))
 385
 386     return ret
 387
 388 def get_content_range(request):
 389     """Parse a Content-Range header from the request.
 390
 391     Either returns None, when the header is not existent or should be ignored,
 392     or an (offset, length, total) tuple - check as length, total may be None.
 393     Returns (None, None, None) if the provided range is '*/*'.
 394     """
 395
 396     ranges = request.META.get('HTTP_CONTENT_RANGE', '')
 397     if not ranges:
 398         return None
 399
 400     p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
 401     m = p.match(ranges)
 402     if not m:
 403         if ranges == 'bytes */*':
 404             return (None, None, None)
 405         return None
 406     offset = int(m.group('offset'))
 407     upto = m.group('upto')
 408     total = m.group('total')
 409     if upto != '':
 410         upto = int(upto)
 411     else:
 412         upto = None
 413     if total != '*':
 414         total = int(total)
 415     else:
 416         total = None
 417     if (upto is not None and offset > upto) or \
 418         (total is not None and offset >= total) or \
 419         (total is not None and upto is not None and upto >= total):
 420         return None
 421
 422     if upto is None:
 423         length = None
 424     else:
 425         length = upto - offset + 1
 426     return (offset, length, total)
 427
 428 def get_sharing(request):
 429     """Parse an X-Object-Sharing header from the request.
 430
 431     Raises BadRequest on error.
 432     """
 433
 434     permissions = request.META.get('HTTP_X_OBJECT_SHARING')
 435     if permissions is None:
 436         return None
 437
 438     # TODO: Document or remove '~' replacing.
 439     permissions = permissions.replace('~', '')
 440
 441     ret = {}
 442     permissions = permissions.replace(' ', '')
 443     if permissions == '':
 444         return ret
 445     for perm in (x for x in permissions.split(';')):
 446         if perm.startswith('read='):
 447             ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
 448             if '' in ret['read']:
 449                 ret['read'].remove('')
 450             if '*' in ret['read']:
 451                 ret['read'] = ['*']
 452             if len(ret['read']) == 0:
 453                 raise BadRequest('Bad X-Object-Sharing header value')
 454         elif perm.startswith('write='):
 455             ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
 456             if '' in ret['write']:
 457                 ret['write'].remove('')
 458             if '*' in ret['write']:
 459                 ret['write'] = ['*']
 460             if len(ret['write']) == 0:
 461                 raise BadRequest('Bad X-Object-Sharing header value')
 462         else:
 463             raise BadRequest('Bad X-Object-Sharing header value')
 464
 465     # Keep duplicates only in write list.
 466     dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
 467     if dups:
 468         for x in dups:
 469             ret['read'].remove(x)
 470         if len(ret['read']) == 0:
 471             del(ret['read'])
 472
 473     return ret
 474
 475 def get_public(request):
 476     """Parse an X-Object-Public header from the request.
 477
 478     Raises BadRequest on error.
 479     """
 480
 481     public = request.META.get('HTTP_X_OBJECT_PUBLIC')
 482     if public is None:
 483         return None
 484
 485     public = public.replace(' ', '').lower()
 486     if public == 'true':
 487         return True
 488     elif public == 'false' or public == '':
 489         return False
 490     raise BadRequest('Bad X-Object-Public header value')
 491
 492 def raw_input_socket(request):
 493     """Return the socket for reading the rest of the request."""
 494
 495     server_software = request.META.get('SERVER_SOFTWARE')
 496     if server_software and server_software.startswith('mod_python'):
 497         return request._req
 498     if 'wsgi.input' in request.environ:
 499         return request.environ['wsgi.input']
 500     raise NotImplemented('Unknown server software')
 501
 502 MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
 503
 504 def socket_read_iterator(request, length=0, blocksize=4096):
 505     """Return a maximum of blocksize data read from the socket in each iteration.
 506
 507     Read up to 'length'. If 'length' is negative, will attempt a chunked read.
 508     The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
 509     """
 510
 511     sock = raw_input_socket(request)
 512     if length < 0: # Chunked transfers
 513         # Small version (server does the dechunking).
 514         if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
 515             while length < MAX_UPLOAD_SIZE:
 516                 data = sock.read(blocksize)
 517                 if data == '':
 518                     return
 519                 yield data
 520             raise BadRequest('Maximum size is reached')
 521
 522         # Long version (do the dechunking).
 523         data = ''
 524         while length < MAX_UPLOAD_SIZE:
 525             # Get chunk size.
 526             if hasattr(sock, 'readline'):
 527                 chunk_length = sock.readline()
 528             else:
 529                 chunk_length = ''
 530                 while chunk_length[-1:] != '\n':
 531                     chunk_length += sock.read(1)
 532                 chunk_length.strip()
 533             pos = chunk_length.find(';')
 534             if pos >= 0:
 535                 chunk_length = chunk_length[:pos]
 536             try:
 537                 chunk_length = int(chunk_length, 16)
 538             except Exception, e:
 539                 raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
 540             # Check if done.
 541             if chunk_length == 0:
 542                 if len(data) > 0:
 543                     yield data
 544                 return
 545             # Get the actual data.
 546             while chunk_length > 0:
 547                 chunk = sock.read(min(chunk_length, blocksize))
 548                 chunk_length -= len(chunk)
 549                 if length > 0:
 550                     length += len(chunk)
 551                 data += chunk
 552                 if len(data) >= blocksize:
 553                     ret = data[:blocksize]
 554                     data = data[blocksize:]
 555                     yield ret
 556             sock.read(2) # CRLF
 557         raise BadRequest('Maximum size is reached')
 558     else:
 559         if length > MAX_UPLOAD_SIZE:
 560             raise BadRequest('Maximum size is reached')
 561         while length > 0:
 562             data = sock.read(min(length, blocksize))
 563             if not data:
 564                 raise BadRequest()
 565             length -= len(data)
 566             yield data
 567
 568 class SaveToBackendHandler(FileUploadHandler):
 569     """Handle a file from an HTML form the django way."""
 570
 571     def __init__(self, request=None):
 572         super(SaveToBackendHandler, self).__init__(request)
 573         self.backend = request.backend
 574
 575     def put_data(self, length):
 576         if len(self.data) >= length:
 577             block = self.data[:length]
 578             self.file.hashmap.append(self.backend.put_block(block))
 579             self.md5.update(block)
 580             self.data = self.data[length:]
 581
 582     def new_file(self, field_name, file_name, content_type, content_length, charset=None):
 583         self.md5 = hashlib.md5()
 584         self.data = ''
 585         self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
 586         self.file.size = 0
 587         self.file.hashmap = []
 588
 589     def receive_data_chunk(self, raw_data, start):
 590         self.data += raw_data
 591         self.file.size += len(raw_data)
 592         self.put_data(self.request.backend.block_size)
 593         return None
 594
 595     def file_complete(self, file_size):
 596         l = len(self.data)
 597         if l > 0:
 598             self.put_data(l)
 599         self.file.etag = self.md5.hexdigest().lower()
 600         return self.file
 601
 602 class ObjectWrapper(object):
 603     """Return the object's data block-per-block in each iteration.
 604
 605     Read from the object using the offset and length provided in each entry of the range list.
 606     """
 607
 608     def __init__(self, backend, ranges, sizes, hashmaps, boundary):
 609         self.backend = backend
 610         self.ranges = ranges
 611         self.sizes = sizes
 612         self.hashmaps = hashmaps
 613         self.boundary = boundary
 614         self.size = sum(self.sizes)
 615
 616         self.file_index = 0
 617         self.block_index = 0
 618         self.block_hash = -1
 619         self.block = ''
 620
 621         self.range_index = -1
 622         self.offset, self.length = self.ranges[0]
 623
 624     def __iter__(self):
 625         return self
 626
 627     def part_iterator(self):
 628         if self.length > 0:
 629             # Get the file for the current offset.
 630             file_size = self.sizes[self.file_index]
 631             while self.offset >= file_size:
 632                 self.offset -= file_size
 633                 self.file_index += 1
 634                 file_size = self.sizes[self.file_index]
 635
 636             # Get the block for the current position.
 637             self.block_index = int(self.offset / self.backend.block_size)
 638             if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
 639                 self.block_hash = self.hashmaps[self.file_index][self.block_index]
 640                 try:
 641                     self.block = self.backend.get_block(self.block_hash)
 642                 except NameError:
 643                     raise ItemNotFound('Block does not exist')
 644
 645             # Get the data from the block.
 646             bo = self.offset % self.backend.block_size
 647             bl = min(self.length, len(self.block) - bo)
 648             data = self.block[bo:bo + bl]
 649             self.offset += bl
 650             self.length -= bl
 651             return data
 652         else:
 653             raise StopIteration
 654
 655     def next(self):
 656         if len(self.ranges) == 1:
 657             return self.part_iterator()
 658         if self.range_index == len(self.ranges):
 659             raise StopIteration
 660         try:
 661             if self.range_index == -1:
 662                 raise StopIteration
 663             return self.part_iterator()
 664         except StopIteration:
 665             self.range_index += 1
 666             out = []
 667             if self.range_index < len(self.ranges):
 668                 # Part header.
 669                 self.offset, self.length = self.ranges[self.range_index]
 670                 self.file_index = 0
 671                 if self.range_index > 0:
 672                     out.append('')
 673                 out.append('--' + self.boundary)
 674                 out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
 675                 out.append('Content-Transfer-Encoding: binary')
 676                 out.append('')
 677                 out.append('')
 678                 return '\r\n'.join(out)
 679             else:
 680                 # Footer.
 681                 out.append('')
 682                 out.append('--' + self.boundary + '--')
 683                 out.append('')
 684                 return '\r\n'.join(out)
 685
 686 def object_data_response(request, sizes, hashmaps, meta, public=False):
 687     """Get the HttpResponse object for replying with the object's data."""
 688
 689     # Range handling.
 690     size = sum(sizes)
 691     ranges = get_range(request, size)
 692     if ranges is None:
 693         ranges = [(0, size)]
 694         ret = 200
 695     else:
 696         check = [True for offset, length in ranges if
 697                     length <= 0 or length > size or
 698                     offset < 0 or offset >= size or
 699                     offset + length > size]
 700         if len(check) > 0:
 701             raise RangeNotSatisfiable('Requested range exceeds object limits')
 702         ret = 206
 703         if_range = request.META.get('HTTP_IF_RANGE')
 704         if if_range:
 705             try:
 706                 # Modification time has passed instead.
 707                 last_modified = parse_http_date(if_range)
 708                 if last_modified != meta['modified']:
 709                     ranges = [(0, size)]
 710                     ret = 200
 711             except ValueError:
 712                 if if_range != meta['ETag']:
 713                     ranges = [(0, size)]
 714                     ret = 200
 715
 716     if ret == 206 and len(ranges) > 1:
 717         boundary = uuid.uuid4().hex
 718     else:
 719         boundary = ''
 720     wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
 721     response = HttpResponse(wrapper, status=ret)
 722     put_object_headers(response, meta, public)
 723     if ret == 206:
 724         if len(ranges) == 1:
 725             offset, length = ranges[0]
 726             response['Content-Length'] = length # Update with the correct length.
 727             response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
 728         else:
 729             del(response['Content-Length'])
 730             response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
 731     return response
 732
 733 def put_object_block(request, hashmap, data, offset):
 734     """Put one block of data at the given offset."""
 735
 736     bi = int(offset / request.backend.block_size)
 737     bo = offset % request.backend.block_size
 738     bl = min(len(data), request.backend.block_size - bo)
 739     if bi < len(hashmap):
 740         hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
 741     else:
 742         hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
 743     return bl # Return ammount of data written.
 744
 745 def hashmap_md5(request, hashmap, size):
 746     """Produce the MD5 sum from the data in the hashmap."""
 747
 748     # TODO: Search backend for the MD5 of another object with the same hashmap and size...
 749     md5 = hashlib.md5()
 750     bs = request.backend.block_size
 751     for bi, hash in enumerate(hashmap):
 752         data = request.backend.get_block(hash)
 753         if bi == len(hashmap) - 1:
 754             bs = size % bs
 755         pad = bs - min(len(data), bs)
 756         md5.update(data + ('\x00' * pad))
 757     return md5.hexdigest().lower()
 758
 759 def get_backend():
 760     backend = connect_backend(db_module=settings.BACKEND_DB_MODULE,
 761                               db_connection=settings.BACKEND_DB_CONNECTION,
 762                               block_module=settings.BACKEND_BLOCK_MODULE,
 763                               block_path=settings.BACKEND_BLOCK_PATH)
 764     backend.default_policy['quota'] = settings.BACKEND_QUOTA
 765     backend.default_policy['versioning'] = settings.BACKEND_VERSIONING
 766     return backend
 767
 768 def update_request_headers(request):
 769     # Handle URL-encoded keys and values.
 770     # Handle URL-encoded keys and values.
 771     meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
 772     if len(meta) > 90:
 773         raise BadRequest('Too many headers.')
 774     for k, v in meta.iteritems():
 775         if len(k) > 128:
 776             raise BadRequest('Header name too large.')
 777         if len(v) > 256:
 778             raise BadRequest('Header value too large.')
 779         try:
 780             k.decode('ascii')
 781             v.decode('ascii')
 782         except UnicodeDecodeError:
 783             raise BadRequest('Bad character in headers.')
 784         if '%' in k or '%' in v:
 785             del(request.META[k])
 786             request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
 787
 788 def update_response_headers(request, response):
 789     if request.serialization == 'xml':
 790         response['Content-Type'] = 'application/xml; charset=UTF-8'
 791     elif request.serialization == 'json':
 792         response['Content-Type'] = 'application/json; charset=UTF-8'
 793     elif not response['Content-Type']:
 794         response['Content-Type'] = 'text/plain; charset=UTF-8'
 795
 796     if (not response.has_header('Content-Length') and
 797         not (response.has_header('Content-Type') and
 798              response['Content-Type'].startswith('multipart/byteranges'))):
 799         response['Content-Length'] = len(response.content)
 800
 801     # URL-encode unicode in headers.
 802     meta = response.items()
 803     for k, v in meta:
 804         if (k.startswith('X-Account-') or k.startswith('X-Container-') or
 805             k.startswith('X-Object-') or k.startswith('Content-')):
 806             del(response[k])
 807             response[quote(k)] = quote(v, safe='/=,:@; ')
 808
 809     if settings.TEST:
 810         response['Date'] = format_date_time(time())
 811
 812 def render_fault(request, fault):
 813     if isinstance(fault, InternalServerError) and (settings.DEBUG or settings.TEST):
 814         fault.details = format_exc(fault)
 815
 816     request.serialization = 'text'
 817     data = fault.message + '\n'
 818     if fault.details:
 819         data += '\n' + fault.details
 820     response = HttpResponse(data, status=fault.code)
 821     update_response_headers(request, response)
 822     return response
 823
 824 def request_serialization(request, format_allowed=False):
 825     """Return the serialization format requested.
 826
 827     Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
 828     """
 829
 830     if not format_allowed:
 831         return 'text'
 832
 833     format = request.GET.get('format')
 834     if format == 'json':
 835         return 'json'
 836     elif format == 'xml':
 837         return 'xml'
 838
 839     for item in request.META.get('HTTP_ACCEPT', '').split(','):
 840         accept, sep, rest = item.strip().partition(';')
 841         if accept == 'application/json':
 842             return 'json'
 843         elif accept == 'application/xml' or accept == 'text/xml':
 844             return 'xml'
 845
 846     return 'text'
 847
 848 def api_method(http_method=None, format_allowed=False, user_required=True):
 849     """Decorator function for views that implement an API method."""
 850
 851     def decorator(func):
 852         @wraps(func)
 853         def wrapper(request, *args, **kwargs):
 854             try:
 855                 if http_method and request.method != http_method:
 856                     raise BadRequest('Method not allowed.')
 857                 if user_required and getattr(request, 'user', None) is None:
 858                     raise Unauthorized('Access denied')
 859
 860                 # The args variable may contain up to (account, container, object).
 861                 if len(args) > 1 and len(args[1]) > 256:
 862                     raise BadRequest('Container name too large.')
 863                 if len(args) > 2 and len(args[2]) > 1024:
 864                     raise BadRequest('Object name too large.')
 865
 866                 # Format and check headers.
 867                 update_request_headers(request)
 868
 869                 # Fill in custom request variables.
 870                 request.serialization = request_serialization(request, format_allowed)
 871                 request.backend = get_backend()
 872
 873                 response = func(request, *args, **kwargs)
 874                 update_response_headers(request, response)
 875                 return response
 876             except Fault, fault:
 877                 return render_fault(request, fault)
 878             except BaseException, e:
 879                 logger.exception('Unexpected error: %s' % e)
 880                 fault = InternalServerError('Unexpected error')
 881                 return render_fault(request, fault)
 882             finally:
 883                 if getattr(request, 'backend', None) is not None:
 884                     request.backend.close()
 885         return wrapper
 886     return decorator