code.grnet.gr Git - pithos/blob - pithos/api/util.py

   1 # Copyright 2011-2012 GRNET S.A. All rights reserved.
   2 #
   3 # Redistribution and use in source and binary forms, with or
   4 # without modification, are permitted provided that the following
   5 # conditions are met:
   6 #
   7 #   1. Redistributions of source code must retain the above
   8 #      copyright notice, this list of conditions and the following
   9 #      disclaimer.
  10 #
  11 #   2. Redistributions in binary form must reproduce the above
  12 #      copyright notice, this list of conditions and the following
  13 #      disclaimer in the documentation and/or other materials
  14 #      provided with the distribution.
  15 #
  16 # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
  17 # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  19 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
  20 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  23 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  24 # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  26 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27 # POSSIBILITY OF SUCH DAMAGE.
  28 #
  29 # The views and conclusions contained in the software and
  30 # documentation are those of the authors and should not be
  31 # interpreted as representing official policies, either expressed
  32 # or implied, of GRNET S.A.
  33
  34 from functools import wraps
  35 from time import time
  36 from traceback import format_exc
  37 from wsgiref.handlers import format_date_time
  38 from binascii import hexlify, unhexlify
  39 from datetime import datetime, tzinfo, timedelta
  40 from urllib import quote, unquote
  41
  42 from django.conf import settings
  43 from django.http import HttpResponse
  44 from django.template.loader import render_to_string
  45 from django.utils import simplejson as json
  46 from django.utils.http import http_date, parse_etags
  47 from django.utils.encoding import smart_unicode, smart_str
  48 from django.core.files.uploadhandler import FileUploadHandler
  49 from django.core.files.uploadedfile import UploadedFile
  50
  51 from pithos.lib.compat import parse_http_date_safe, parse_http_date
  52
  53 from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
  54                                 Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
  55                                 RangeNotSatisfiable, InternalServerError, NotImplemented)
  56 from pithos.api.short_url import encode_url
  57 from pithos.backends import connect_backend
  58 from pithos.backends.base import NotAllowedError, QuotaError
  59
  60 import logging
  61 import re
  62 import hashlib
  63 import uuid
  64 import decimal
  65
  66
  67 logger = logging.getLogger(__name__)
  68
  69
  70 class UTC(tzinfo):
  71    def utcoffset(self, dt):
  72        return timedelta(0)
  73
  74    def tzname(self, dt):
  75        return 'UTC'
  76
  77    def dst(self, dt):
  78        return timedelta(0)
  79
  80 def json_encode_decimal(obj):
  81     if isinstance(obj, decimal.Decimal):
  82         return str(obj)
  83     raise TypeError(repr(obj) + " is not JSON serializable")
  84
  85 def isoformat(d):
  86    """Return an ISO8601 date string that includes a timezone."""
  87
  88    return d.replace(tzinfo=UTC()).isoformat()
  89
  90 def rename_meta_key(d, old, new):
  91     if old not in d:
  92         return
  93     d[new] = d[old]
  94     del(d[old])
  95
  96 def printable_header_dict(d):
  97     """Format a meta dictionary for printing out json/xml.
  98
  99     Convert all keys to lower case and replace dashes with underscores.
 100     Format 'last_modified' timestamp.
 101     """
 102
 103     if 'last_modified' in d:
 104         d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
 105     return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
 106
 107 def format_header_key(k):
 108     """Convert underscores to dashes and capitalize intra-dash strings."""
 109     return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
 110
 111 def get_header_prefix(request, prefix):
 112     """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
 113
 114     prefix = 'HTTP_' + prefix.upper().replace('-', '_')
 115     # TODO: Document or remove '~' replacing.
 116     return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
 117
 118 def get_account_headers(request):
 119     meta = get_header_prefix(request, 'X-Account-Meta-')
 120     groups = {}
 121     for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
 122         n = k[16:].lower()
 123         if '-' in n or '_' in n:
 124             raise BadRequest('Bad characters in group name')
 125         groups[n] = v.replace(' ', '').split(',')
 126         while '' in groups[n]:
 127             groups[n].remove('')
 128     return meta, groups
 129
 130 def put_account_headers(response, meta, groups, policy):
 131     if 'count' in meta:
 132         response['X-Account-Container-Count'] = meta['count']
 133     if 'bytes' in meta:
 134         response['X-Account-Bytes-Used'] = meta['bytes']
 135     response['Last-Modified'] = http_date(int(meta['modified']))
 136     for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
 137         response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
 138     if 'until_timestamp' in meta:
 139         response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
 140     for k, v in groups.iteritems():
 141         k = smart_str(k, strings_only=True)
 142         k = format_header_key('X-Account-Group-' + k)
 143         v = smart_str(','.join(v), strings_only=True)
 144         response[k] = v
 145     for k, v in policy.iteritems():
 146         response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
 147
 148 def get_container_headers(request):
 149     meta = get_header_prefix(request, 'X-Container-Meta-')
 150     policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
 151     return meta, policy
 152
 153 def put_container_headers(request, response, meta, policy):
 154     if 'count' in meta:
 155         response['X-Container-Object-Count'] = meta['count']
 156     if 'bytes' in meta:
 157         response['X-Container-Bytes-Used'] = meta['bytes']
 158     response['Last-Modified'] = http_date(int(meta['modified']))
 159     for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
 160         response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
 161     l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
 162     response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
 163     response['X-Container-Block-Size'] = request.backend.block_size
 164     response['X-Container-Block-Hash'] = request.backend.hash_algorithm
 165     if 'until_timestamp' in meta:
 166         response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
 167     for k, v in policy.iteritems():
 168         response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
 169
 170 def get_object_headers(request):
 171     meta = get_header_prefix(request, 'X-Object-Meta-')
 172     if request.META.get('CONTENT_TYPE'):
 173         meta['Content-Type'] = request.META['CONTENT_TYPE']
 174     if request.META.get('HTTP_CONTENT_ENCODING'):
 175         meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
 176     if request.META.get('HTTP_CONTENT_DISPOSITION'):
 177         meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
 178     if request.META.get('HTTP_X_OBJECT_MANIFEST'):
 179         meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
 180     return meta, get_sharing(request), get_public(request)
 181
 182 def put_object_headers(response, meta, restricted=False):
 183     if 'ETag' in meta:
 184         response['ETag'] = meta['ETag']
 185     response['Content-Length'] = meta['bytes']
 186     response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
 187     response['Last-Modified'] = http_date(int(meta['modified']))
 188     if not restricted:
 189         response['X-Object-Hash'] = meta['hash']
 190         response['X-Object-UUID'] = meta['uuid']
 191         response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
 192         response['X-Object-Version'] = meta['version']
 193         response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
 194         for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
 195             response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
 196         for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
 197                   'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
 198                   'X-Object-Public'):
 199             if k in meta:
 200                 response[k] = smart_str(meta[k], strings_only=True)
 201     else:
 202         for k in ('Content-Encoding', 'Content-Disposition'):
 203             if k in meta:
 204                 response[k] = smart_str(meta[k], strings_only=True)
 205
 206 def update_manifest_meta(request, v_account, meta):
 207     """Update metadata if the object has an X-Object-Manifest."""
 208
 209     if 'X-Object-Manifest' in meta:
 210         etag = ''
 211         bytes = 0
 212         try:
 213             src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
 214             objects = request.backend.list_objects(request.user_uniq, v_account,
 215                                 src_container, prefix=src_name, virtual=False)
 216             for x in objects:
 217                 src_meta = request.backend.get_object_meta(request.user_uniq,
 218                                         v_account, src_container, x[0], 'pithos', x[1])
 219                 if 'ETag' in src_meta:
 220                     etag += src_meta['ETag']
 221                 bytes += src_meta['bytes']
 222         except:
 223             # Ignore errors.
 224             return
 225         meta['bytes'] = bytes
 226         md5 = hashlib.md5()
 227         md5.update(etag)
 228         meta['ETag'] = md5.hexdigest().lower()
 229
 230 def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
 231     if permissions is None:
 232         return
 233     allowed, perm_path, perms = permissions
 234     if len(perms) == 0:
 235         return
 236     ret = []
 237     r = ','.join(perms.get('read', []))
 238     if r:
 239         ret.append('read=' + r)
 240     w = ','.join(perms.get('write', []))
 241     if w:
 242         ret.append('write=' + w)
 243     meta['X-Object-Sharing'] = '; '.join(ret)
 244     if '/'.join((v_account, v_container, v_object)) != perm_path:
 245         meta['X-Object-Shared-By'] = perm_path
 246     if request.user_uniq != v_account:
 247         meta['X-Object-Allowed-To'] = allowed
 248
 249 def update_public_meta(public, meta):
 250     if not public:
 251         return
 252     meta['X-Object-Public'] = '/public/' + encode_url(public)
 253
 254 def validate_modification_preconditions(request, meta):
 255     """Check that the modified timestamp conforms with the preconditions set."""
 256
 257     if 'modified' not in meta:
 258         return # TODO: Always return?
 259
 260     if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
 261     if if_modified_since is not None:
 262         if_modified_since = parse_http_date_safe(if_modified_since)
 263     if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
 264         raise NotModified('Resource has not been modified')
 265
 266     if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
 267     if if_unmodified_since is not None:
 268         if_unmodified_since = parse_http_date_safe(if_unmodified_since)
 269     if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
 270         raise PreconditionFailed('Resource has been modified')
 271
 272 def validate_matching_preconditions(request, meta):
 273     """Check that the ETag conforms with the preconditions set."""
 274
 275     etag = meta.get('ETag', None)
 276
 277     if_match = request.META.get('HTTP_IF_MATCH')
 278     if if_match is not None:
 279         if etag is None:
 280             raise PreconditionFailed('Resource does not exist')
 281         if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
 282             raise PreconditionFailed('Resource ETag does not match')
 283
 284     if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
 285     if if_none_match is not None:
 286         # TODO: If this passes, must ignore If-Modified-Since header.
 287         if etag is not None:
 288             if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
 289                 # TODO: Continue if an If-Modified-Since header is present.
 290                 if request.method in ('HEAD', 'GET'):
 291                     raise NotModified('Resource ETag matches')
 292                 raise PreconditionFailed('Resource exists or ETag matches')
 293
 294 def split_container_object_string(s):
 295     if not len(s) > 0 or s[0] != '/':
 296         raise ValueError
 297     s = s[1:]
 298     pos = s.find('/')
 299     if pos == -1 or pos == len(s) - 1:
 300         raise ValueError
 301     return s[:pos], s[(pos + 1):]
 302
 303 def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
 304     """Copy or move an object."""
 305
 306     if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
 307         del(request.META['CONTENT_TYPE'])
 308     meta, permissions, public = get_object_headers(request)
 309     src_version = request.META.get('HTTP_X_SOURCE_VERSION')
 310     try:
 311         if move:
 312             version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
 313                                                         dest_account, dest_container, dest_name,
 314                                                         'pithos', meta, False, permissions)
 315         else:
 316             version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
 317                                                         dest_account, dest_container, dest_name,
 318                                                         'pithos', meta, False, permissions, src_version)
 319     except NotAllowedError:
 320         raise Forbidden('Not allowed')
 321     except (NameError, IndexError):
 322         raise ItemNotFound('Container or object does not exist')
 323     except ValueError:
 324         raise BadRequest('Invalid sharing header')
 325     except AttributeError, e:
 326         raise Conflict(simple_list_response(request, e.data))
 327     except QuotaError:
 328         raise RequestEntityTooLarge('Quota exceeded')
 329     if public is not None:
 330         try:
 331             request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
 332         except NotAllowedError:
 333             raise Forbidden('Not allowed')
 334         except NameError:
 335             raise ItemNotFound('Object does not exist')
 336     return version_id
 337
 338 def get_int_parameter(p):
 339     if p is not None:
 340         try:
 341             p = int(p)
 342         except ValueError:
 343             return None
 344         if p < 0:
 345             return None
 346     return p
 347
 348 def get_content_length(request):
 349     content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
 350     if content_length is None:
 351         raise LengthRequired('Missing or invalid Content-Length header')
 352     return content_length
 353
 354 def get_range(request, size):
 355     """Parse a Range header from the request.
 356
 357     Either returns None, when the header is not existent or should be ignored,
 358     or a list of (offset, length) tuples - should be further checked.
 359     """
 360
 361     ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
 362     if not ranges.startswith('bytes='):
 363         return None
 364
 365     ret = []
 366     for r in (x.strip() for x in ranges[6:].split(',')):
 367         p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
 368         m = p.match(r)
 369         if not m:
 370             return None
 371         offset = m.group('offset')
 372         upto = m.group('upto')
 373         if offset == '' and upto == '':
 374             return None
 375
 376         if offset != '':
 377             offset = int(offset)
 378             if upto != '':
 379                 upto = int(upto)
 380                 if offset > upto:
 381                     return None
 382                 ret.append((offset, upto - offset + 1))
 383             else:
 384                 ret.append((offset, size - offset))
 385         else:
 386             length = int(upto)
 387             ret.append((size - length, length))
 388
 389     return ret
 390
 391 def get_content_range(request):
 392     """Parse a Content-Range header from the request.
 393
 394     Either returns None, when the header is not existent or should be ignored,
 395     or an (offset, length, total) tuple - check as length, total may be None.
 396     Returns (None, None, None) if the provided range is '*/*'.
 397     """
 398
 399     ranges = request.META.get('HTTP_CONTENT_RANGE', '')
 400     if not ranges:
 401         return None
 402
 403     p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
 404     m = p.match(ranges)
 405     if not m:
 406         if ranges == 'bytes */*':
 407             return (None, None, None)
 408         return None
 409     offset = int(m.group('offset'))
 410     upto = m.group('upto')
 411     total = m.group('total')
 412     if upto != '':
 413         upto = int(upto)
 414     else:
 415         upto = None
 416     if total != '*':
 417         total = int(total)
 418     else:
 419         total = None
 420     if (upto is not None and offset > upto) or \
 421         (total is not None and offset >= total) or \
 422         (total is not None and upto is not None and upto >= total):
 423         return None
 424
 425     if upto is None:
 426         length = None
 427     else:
 428         length = upto - offset + 1
 429     return (offset, length, total)
 430
 431 def get_sharing(request):
 432     """Parse an X-Object-Sharing header from the request.
 433
 434     Raises BadRequest on error.
 435     """
 436
 437     permissions = request.META.get('HTTP_X_OBJECT_SHARING')
 438     if permissions is None:
 439         return None
 440
 441     # TODO: Document or remove '~' replacing.
 442     permissions = permissions.replace('~', '')
 443
 444     ret = {}
 445     permissions = permissions.replace(' ', '')
 446     if permissions == '':
 447         return ret
 448     for perm in (x for x in permissions.split(';')):
 449         if perm.startswith('read='):
 450             ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
 451             if '' in ret['read']:
 452                 ret['read'].remove('')
 453             if '*' in ret['read']:
 454                 ret['read'] = ['*']
 455             if len(ret['read']) == 0:
 456                 raise BadRequest('Bad X-Object-Sharing header value')
 457         elif perm.startswith('write='):
 458             ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
 459             if '' in ret['write']:
 460                 ret['write'].remove('')
 461             if '*' in ret['write']:
 462                 ret['write'] = ['*']
 463             if len(ret['write']) == 0:
 464                 raise BadRequest('Bad X-Object-Sharing header value')
 465         else:
 466             raise BadRequest('Bad X-Object-Sharing header value')
 467
 468     # Keep duplicates only in write list.
 469     dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
 470     if dups:
 471         for x in dups:
 472             ret['read'].remove(x)
 473         if len(ret['read']) == 0:
 474             del(ret['read'])
 475
 476     return ret
 477
 478 def get_public(request):
 479     """Parse an X-Object-Public header from the request.
 480
 481     Raises BadRequest on error.
 482     """
 483
 484     public = request.META.get('HTTP_X_OBJECT_PUBLIC')
 485     if public is None:
 486         return None
 487
 488     public = public.replace(' ', '').lower()
 489     if public == 'true':
 490         return True
 491     elif public == 'false' or public == '':
 492         return False
 493     raise BadRequest('Bad X-Object-Public header value')
 494
 495 def raw_input_socket(request):
 496     """Return the socket for reading the rest of the request."""
 497
 498     server_software = request.META.get('SERVER_SOFTWARE')
 499     if server_software and server_software.startswith('mod_python'):
 500         return request._req
 501     if 'wsgi.input' in request.environ:
 502         return request.environ['wsgi.input']
 503     raise NotImplemented('Unknown server software')
 504
 505 MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
 506
 507 def socket_read_iterator(request, length=0, blocksize=4096):
 508     """Return a maximum of blocksize data read from the socket in each iteration.
 509
 510     Read up to 'length'. If 'length' is negative, will attempt a chunked read.
 511     The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
 512     """
 513
 514     sock = raw_input_socket(request)
 515     if length < 0: # Chunked transfers
 516         # Small version (server does the dechunking).
 517         if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
 518             while length < MAX_UPLOAD_SIZE:
 519                 data = sock.read(blocksize)
 520                 if data == '':
 521                     return
 522                 yield data
 523             raise BadRequest('Maximum size is reached')
 524
 525         # Long version (do the dechunking).
 526         data = ''
 527         while length < MAX_UPLOAD_SIZE:
 528             # Get chunk size.
 529             if hasattr(sock, 'readline'):
 530                 chunk_length = sock.readline()
 531             else:
 532                 chunk_length = ''
 533                 while chunk_length[-1:] != '\n':
 534                     chunk_length += sock.read(1)
 535                 chunk_length.strip()
 536             pos = chunk_length.find(';')
 537             if pos >= 0:
 538                 chunk_length = chunk_length[:pos]
 539             try:
 540                 chunk_length = int(chunk_length, 16)
 541             except Exception, e:
 542                 raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
 543             # Check if done.
 544             if chunk_length == 0:
 545                 if len(data) > 0:
 546                     yield data
 547                 return
 548             # Get the actual data.
 549             while chunk_length > 0:
 550                 chunk = sock.read(min(chunk_length, blocksize))
 551                 chunk_length -= len(chunk)
 552                 if length > 0:
 553                     length += len(chunk)
 554                 data += chunk
 555                 if len(data) >= blocksize:
 556                     ret = data[:blocksize]
 557                     data = data[blocksize:]
 558                     yield ret
 559             sock.read(2) # CRLF
 560         raise BadRequest('Maximum size is reached')
 561     else:
 562         if length > MAX_UPLOAD_SIZE:
 563             raise BadRequest('Maximum size is reached')
 564         while length > 0:
 565             data = sock.read(min(length, blocksize))
 566             if not data:
 567                 raise BadRequest()
 568             length -= len(data)
 569             yield data
 570
 571 class SaveToBackendHandler(FileUploadHandler):
 572     """Handle a file from an HTML form the django way."""
 573
 574     def __init__(self, request=None):
 575         super(SaveToBackendHandler, self).__init__(request)
 576         self.backend = request.backend
 577
 578     def put_data(self, length):
 579         if len(self.data) >= length:
 580             block = self.data[:length]
 581             self.file.hashmap.append(self.backend.put_block(block))
 582             self.md5.update(block)
 583             self.data = self.data[length:]
 584
 585     def new_file(self, field_name, file_name, content_type, content_length, charset=None):
 586         self.md5 = hashlib.md5()
 587         self.data = ''
 588         self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
 589         self.file.size = 0
 590         self.file.hashmap = []
 591
 592     def receive_data_chunk(self, raw_data, start):
 593         self.data += raw_data
 594         self.file.size += len(raw_data)
 595         self.put_data(self.request.backend.block_size)
 596         return None
 597
 598     def file_complete(self, file_size):
 599         l = len(self.data)
 600         if l > 0:
 601             self.put_data(l)
 602         self.file.etag = self.md5.hexdigest().lower()
 603         return self.file
 604
 605 class ObjectWrapper(object):
 606     """Return the object's data block-per-block in each iteration.
 607
 608     Read from the object using the offset and length provided in each entry of the range list.
 609     """
 610
 611     def __init__(self, backend, ranges, sizes, hashmaps, boundary):
 612         self.backend = backend
 613         self.ranges = ranges
 614         self.sizes = sizes
 615         self.hashmaps = hashmaps
 616         self.boundary = boundary
 617         self.size = sum(self.sizes)
 618
 619         self.file_index = 0
 620         self.block_index = 0
 621         self.block_hash = -1
 622         self.block = ''
 623
 624         self.range_index = -1
 625         self.offset, self.length = self.ranges[0]
 626
 627     def __iter__(self):
 628         return self
 629
 630     def part_iterator(self):
 631         if self.length > 0:
 632             # Get the file for the current offset.
 633             file_size = self.sizes[self.file_index]
 634             while self.offset >= file_size:
 635                 self.offset -= file_size
 636                 self.file_index += 1
 637                 file_size = self.sizes[self.file_index]
 638
 639             # Get the block for the current position.
 640             self.block_index = int(self.offset / self.backend.block_size)
 641             if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
 642                 self.block_hash = self.hashmaps[self.file_index][self.block_index]
 643                 try:
 644                     self.block = self.backend.get_block(self.block_hash)
 645                 except NameError:
 646                     raise ItemNotFound('Block does not exist')
 647
 648             # Get the data from the block.
 649             bo = self.offset % self.backend.block_size
 650             bl = min(self.length, len(self.block) - bo)
 651             data = self.block[bo:bo + bl]
 652             self.offset += bl
 653             self.length -= bl
 654             return data
 655         else:
 656             raise StopIteration
 657
 658     def next(self):
 659         if len(self.ranges) == 1:
 660             return self.part_iterator()
 661         if self.range_index == len(self.ranges):
 662             raise StopIteration
 663         try:
 664             if self.range_index == -1:
 665                 raise StopIteration
 666             return self.part_iterator()
 667         except StopIteration:
 668             self.range_index += 1
 669             out = []
 670             if self.range_index < len(self.ranges):
 671                 # Part header.
 672                 self.offset, self.length = self.ranges[self.range_index]
 673                 self.file_index = 0
 674                 if self.range_index > 0:
 675                     out.append('')
 676                 out.append('--' + self.boundary)
 677                 out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
 678                 out.append('Content-Transfer-Encoding: binary')
 679                 out.append('')
 680                 out.append('')
 681                 return '\r\n'.join(out)
 682             else:
 683                 # Footer.
 684                 out.append('')
 685                 out.append('--' + self.boundary + '--')
 686                 out.append('')
 687                 return '\r\n'.join(out)
 688
 689 def object_data_response(request, sizes, hashmaps, meta, public=False):
 690     """Get the HttpResponse object for replying with the object's data."""
 691
 692     # Range handling.
 693     size = sum(sizes)
 694     ranges = get_range(request, size)
 695     if ranges is None:
 696         ranges = [(0, size)]
 697         ret = 200
 698     else:
 699         check = [True for offset, length in ranges if
 700                     length <= 0 or length > size or
 701                     offset < 0 or offset >= size or
 702                     offset + length > size]
 703         if len(check) > 0:
 704             raise RangeNotSatisfiable('Requested range exceeds object limits')
 705         ret = 206
 706         if_range = request.META.get('HTTP_IF_RANGE')
 707         if if_range:
 708             try:
 709                 # Modification time has passed instead.
 710                 last_modified = parse_http_date(if_range)
 711                 if last_modified != meta['modified']:
 712                     ranges = [(0, size)]
 713                     ret = 200
 714             except ValueError:
 715                 if if_range != meta['ETag']:
 716                     ranges = [(0, size)]
 717                     ret = 200
 718
 719     if ret == 206 and len(ranges) > 1:
 720         boundary = uuid.uuid4().hex
 721     else:
 722         boundary = ''
 723     wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
 724     response = HttpResponse(wrapper, status=ret)
 725     put_object_headers(response, meta, public)
 726     if ret == 206:
 727         if len(ranges) == 1:
 728             offset, length = ranges[0]
 729             response['Content-Length'] = length # Update with the correct length.
 730             response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
 731         else:
 732             del(response['Content-Length'])
 733             response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
 734     return response
 735
 736 def put_object_block(request, hashmap, data, offset):
 737     """Put one block of data at the given offset."""
 738
 739     bi = int(offset / request.backend.block_size)
 740     bo = offset % request.backend.block_size
 741     bl = min(len(data), request.backend.block_size - bo)
 742     if bi < len(hashmap):
 743         hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
 744     else:
 745         hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
 746     return bl # Return ammount of data written.
 747
 748 def hashmap_md5(request, hashmap, size):
 749     """Produce the MD5 sum from the data in the hashmap."""
 750
 751     # TODO: Search backend for the MD5 of another object with the same hashmap and size...
 752     md5 = hashlib.md5()
 753     bs = request.backend.block_size
 754     for bi, hash in enumerate(hashmap):
 755         data = request.backend.get_block(hash)
 756         if bi == len(hashmap) - 1:
 757             bs = size % bs
 758         pad = bs - min(len(data), bs)
 759         md5.update(data + ('\x00' * pad))
 760     return md5.hexdigest().lower()
 761
 762 def simple_list_response(request, l):
 763     if request.serialization == 'text':
 764         return '\n'.join(l) + '\n'
 765     if request.serialization == 'xml':
 766         return render_to_string('items.xml', {'items': l})
 767     if request.serialization == 'json':
 768         return json.dumps(l)
 769
 770 def get_backend():
 771     backend = connect_backend(db_module=settings.BACKEND_DB_MODULE,
 772                               db_connection=settings.BACKEND_DB_CONNECTION,
 773                               block_module=settings.BACKEND_BLOCK_MODULE,
 774                               block_path=settings.BACKEND_BLOCK_PATH,
 775                               queue_module=settings.BACKEND_QUEUE_MODULE,
 776                               queue_connection=settings.BACKEND_QUEUE_CONNECTION)
 777     backend.default_policy['quota'] = settings.BACKEND_QUOTA
 778     backend.default_policy['versioning'] = settings.BACKEND_VERSIONING
 779     return backend
 780
 781 def update_request_headers(request):
 782     # Handle URL-encoded keys and values.
 783     # Handle URL-encoded keys and values.
 784     meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
 785     if len(meta) > 90:
 786         raise BadRequest('Too many headers.')
 787     for k, v in meta.iteritems():
 788         if len(k) > 128:
 789             raise BadRequest('Header name too large.')
 790         if len(v) > 256:
 791             raise BadRequest('Header value too large.')
 792         try:
 793             k.decode('ascii')
 794             v.decode('ascii')
 795         except UnicodeDecodeError:
 796             raise BadRequest('Bad character in headers.')
 797         if '%' in k or '%' in v:
 798             del(request.META[k])
 799             request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
 800
 801 def update_response_headers(request, response):
 802     if request.serialization == 'xml':
 803         response['Content-Type'] = 'application/xml; charset=UTF-8'
 804     elif request.serialization == 'json':
 805         response['Content-Type'] = 'application/json; charset=UTF-8'
 806     elif not response['Content-Type']:
 807         response['Content-Type'] = 'text/plain; charset=UTF-8'
 808
 809     if (not response.has_header('Content-Length') and
 810         not (response.has_header('Content-Type') and
 811              response['Content-Type'].startswith('multipart/byteranges'))):
 812         response['Content-Length'] = len(response.content)
 813
 814     # URL-encode unicode in headers.
 815     meta = response.items()
 816     for k, v in meta:
 817         if (k.startswith('X-Account-') or k.startswith('X-Container-') or
 818             k.startswith('X-Object-') or k.startswith('Content-')):
 819             del(response[k])
 820             response[quote(k)] = quote(v, safe='/=,:@; ')
 821
 822     if settings.TEST:
 823         response['Date'] = format_date_time(time())
 824
 825 def render_fault(request, fault):
 826     if isinstance(fault, InternalServerError) and (settings.DEBUG or settings.TEST):
 827         fault.details = format_exc(fault)
 828
 829     request.serialization = 'text'
 830     data = fault.message + '\n'
 831     if fault.details:
 832         data += '\n' + fault.details
 833     response = HttpResponse(data, status=fault.code)
 834     update_response_headers(request, response)
 835     return response
 836
 837 def request_serialization(request, format_allowed=False):
 838     """Return the serialization format requested.
 839
 840     Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
 841     """
 842
 843     if not format_allowed:
 844         return 'text'
 845
 846     format = request.GET.get('format')
 847     if format == 'json':
 848         return 'json'
 849     elif format == 'xml':
 850         return 'xml'
 851
 852     for item in request.META.get('HTTP_ACCEPT', '').split(','):
 853         accept, sep, rest = item.strip().partition(';')
 854         if accept == 'application/json':
 855             return 'json'
 856         elif accept == 'application/xml' or accept == 'text/xml':
 857             return 'xml'
 858
 859     return 'text'
 860
 861 def api_method(http_method=None, format_allowed=False, user_required=True):
 862     """Decorator function for views that implement an API method."""
 863
 864     def decorator(func):
 865         @wraps(func)
 866         def wrapper(request, *args, **kwargs):
 867             try:
 868                 if http_method and request.method != http_method:
 869                     raise BadRequest('Method not allowed.')
 870                 if user_required and getattr(request, 'user', None) is None:
 871                     raise Unauthorized('Access denied')
 872
 873                 # The args variable may contain up to (account, container, object).
 874                 if len(args) > 1 and len(args[1]) > 256:
 875                     raise BadRequest('Container name too large.')
 876                 if len(args) > 2 and len(args[2]) > 1024:
 877                     raise BadRequest('Object name too large.')
 878
 879                 # Format and check headers.
 880                 update_request_headers(request)
 881
 882                 # Fill in custom request variables.
 883                 request.serialization = request_serialization(request, format_allowed)
 884                 request.backend = get_backend()
 885
 886                 response = func(request, *args, **kwargs)
 887                 update_response_headers(request, response)
 888                 return response
 889             except Fault, fault:
 890                 return render_fault(request, fault)
 891             except BaseException, e:
 892                 logger.exception('Unexpected error: %s' % e)
 893                 fault = InternalServerError('Unexpected error')
 894                 return render_fault(request, fault)
 895             finally:
 896                 if getattr(request, 'backend', None) is not None:
 897                     request.backend.close()
 898         return wrapper
 899     return decorator