Statistics
| Branch: | Tag: | Revision:

root / pithos / api / util.py @ cddcf432

History | View | Annotate | Download (33.6 kB)

1
# Copyright 2011 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.utils import simplejson as json
45
from django.utils.http import http_date, parse_etags
46
from django.utils.encoding import smart_unicode, smart_str
47
from django.core.files.uploadhandler import FileUploadHandler
48
from django.core.files.uploadedfile import UploadedFile
49

    
50
from pithos.lib.compat import parse_http_date_safe, parse_http_date
51
from pithos.lib.hashmap import HashMap
52

    
53
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
54
                                Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
55
                                RangeNotSatisfiable, ServiceUnavailable)
56
from pithos.api.short_url import encode_url
57
from pithos.backends import connect_backend
58
from pithos.backends.base import NotAllowedError, QuotaError
59

    
60
import logging
61
import re
62
import hashlib
63
import uuid
64
import decimal
65

    
66

    
67
logger = logging.getLogger(__name__)
68

    
69

    
70
class UTC(tzinfo):
71
   def utcoffset(self, dt):
72
       return timedelta(0)
73

    
74
   def tzname(self, dt):
75
       return 'UTC'
76

    
77
   def dst(self, dt):
78
       return timedelta(0)
79

    
80
def json_encode_decimal(obj):
81
    if isinstance(obj, decimal.Decimal):
82
        return str(obj)
83
    raise TypeError(repr(obj) + " is not JSON serializable")
84

    
85
def isoformat(d):
86
   """Return an ISO8601 date string that includes a timezone."""
87

    
88
   return d.replace(tzinfo=UTC()).isoformat()
89

    
90
def rename_meta_key(d, old, new):
91
    if old not in d:
92
        return
93
    d[new] = d[old]
94
    del(d[old])
95

    
96
def printable_header_dict(d):
97
    """Format a meta dictionary for printing out json/xml.
98
    
99
    Convert all keys to lower case and replace dashes with underscores.
100
    Format 'last_modified' timestamp.
101
    """
102
    
103
    if 'last_modified' in d:
104
        d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
105
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
106

    
107
def format_header_key(k):
108
    """Convert underscores to dashes and capitalize intra-dash strings."""
109
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
110

    
111
def get_header_prefix(request, prefix):
112
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
113
    
114
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
115
    # TODO: Document or remove '~' replacing.
116
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
117

    
118
def get_account_headers(request):
119
    meta = get_header_prefix(request, 'X-Account-Meta-')
120
    groups = {}
121
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
122
        n = k[16:].lower()
123
        if '-' in n or '_' in n:
124
            raise BadRequest('Bad characters in group name')
125
        groups[n] = v.replace(' ', '').split(',')
126
        while '' in groups[n]:
127
            groups[n].remove('')
128
    return meta, groups
129

    
130
def put_account_headers(response, meta, groups, policy):
131
    if 'count' in meta:
132
        response['X-Account-Container-Count'] = meta['count']
133
    if 'bytes' in meta:
134
        response['X-Account-Bytes-Used'] = meta['bytes']
135
    response['Last-Modified'] = http_date(int(meta['modified']))
136
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
137
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
138
    if 'until_timestamp' in meta:
139
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
140
    for k, v in groups.iteritems():
141
        k = smart_str(k, strings_only=True)
142
        k = format_header_key('X-Account-Group-' + k)
143
        v = smart_str(','.join(v), strings_only=True)
144
        response[k] = v
145
    for k, v in policy.iteritems():
146
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
147

    
148
def get_container_headers(request):
149
    meta = get_header_prefix(request, 'X-Container-Meta-')
150
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
151
    return meta, policy
152

    
153
def put_container_headers(request, response, meta, policy):
154
    if 'count' in meta:
155
        response['X-Container-Object-Count'] = meta['count']
156
    if 'bytes' in meta:
157
        response['X-Container-Bytes-Used'] = meta['bytes']
158
    response['Last-Modified'] = http_date(int(meta['modified']))
159
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
160
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
161
    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
162
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
163
    response['X-Container-Block-Size'] = request.backend.block_size
164
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
165
    if 'until_timestamp' in meta:
166
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
167
    for k, v in policy.iteritems():
168
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
169

    
170
def get_object_headers(request):
171
    meta = get_header_prefix(request, 'X-Object-Meta-')
172
    if request.META.get('CONTENT_TYPE'):
173
        meta['Content-Type'] = request.META['CONTENT_TYPE']
174
    if request.META.get('HTTP_CONTENT_ENCODING'):
175
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
176
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
177
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
178
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
179
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
180
    return meta, get_sharing(request), get_public(request)
181

    
182
def put_object_headers(response, meta, restricted=False):
183
    response['ETag'] = meta['ETag'] if 'ETag' in meta else meta['hash']
184
    response['Content-Length'] = meta['bytes']
185
    response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
186
    response['Last-Modified'] = http_date(int(meta['modified']))
187
    if not restricted:
188
        response['X-Object-Hash'] = meta['hash']
189
        response['X-Object-UUID'] = meta['uuid']
190
        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
191
        response['X-Object-Version'] = meta['version']
192
        response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
193
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
194
            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
195
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
196
                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
197
                  'X-Object-Public'):
198
            if k in meta:
199
                response[k] = smart_str(meta[k], strings_only=True)
200
    else:
201
        for k in ('Content-Encoding', 'Content-Disposition'):
202
            if k in meta:
203
                response[k] = smart_str(meta[k], strings_only=True)
204

    
205
def update_manifest_meta(request, v_account, meta):
206
    """Update metadata if the object has an X-Object-Manifest."""
207
    
208
    if 'X-Object-Manifest' in meta:
209
        etag = ''
210
        bytes = 0
211
        try:
212
            src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
213
            objects = request.backend.list_objects(request.user_uniq, v_account,
214
                                src_container, prefix=src_name, virtual=False)
215
            for x in objects:
216
                src_meta = request.backend.get_object_meta(request.user_uniq,
217
                                        v_account, src_container, x[0], x[1])
218
                etag += src_meta['ETag']
219
                bytes += src_meta['bytes']
220
        except:
221
            # Ignore errors.
222
            return
223
        meta['bytes'] = bytes
224
        md5 = hashlib.md5()
225
        md5.update(etag)
226
        meta['ETag'] = md5.hexdigest().lower()
227

    
228
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
229
    if permissions is None:
230
        return
231
    allowed, perm_path, perms = permissions
232
    if len(perms) == 0:
233
        return
234
    ret = []
235
    r = ','.join(perms.get('read', []))
236
    if r:
237
        ret.append('read=' + r)
238
    w = ','.join(perms.get('write', []))
239
    if w:
240
        ret.append('write=' + w)
241
    meta['X-Object-Sharing'] = '; '.join(ret)
242
    if '/'.join((v_account, v_container, v_object)) != perm_path:
243
        meta['X-Object-Shared-By'] = perm_path
244
    if request.user_uniq != v_account:
245
        meta['X-Object-Allowed-To'] = allowed
246

    
247
def update_public_meta(public, meta):
248
    if not public:
249
        return
250
    meta['X-Object-Public'] = '/public/' + encode_url(public)
251

    
252
def validate_modification_preconditions(request, meta):
253
    """Check that the modified timestamp conforms with the preconditions set."""
254
    
255
    if 'modified' not in meta:
256
        return # TODO: Always return?
257
    
258
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
259
    if if_modified_since is not None:
260
        if_modified_since = parse_http_date_safe(if_modified_since)
261
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
262
        raise NotModified('Resource has not been modified')
263
    
264
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
265
    if if_unmodified_since is not None:
266
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
267
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
268
        raise PreconditionFailed('Resource has been modified')
269

    
270
def validate_matching_preconditions(request, meta):
271
    """Check that the ETag conforms with the preconditions set."""
272
    
273
    etag = meta.get('ETag', None)
274
    
275
    if_match = request.META.get('HTTP_IF_MATCH')
276
    if if_match is not None:
277
        if etag is None:
278
            raise PreconditionFailed('Resource does not exist')
279
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
280
            raise PreconditionFailed('Resource ETag does not match')
281
    
282
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
283
    if if_none_match is not None:
284
        # TODO: If this passes, must ignore If-Modified-Since header.
285
        if etag is not None:
286
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
287
                # TODO: Continue if an If-Modified-Since header is present.
288
                if request.method in ('HEAD', 'GET'):
289
                    raise NotModified('Resource ETag matches')
290
                raise PreconditionFailed('Resource exists or ETag matches')
291

    
292
def split_container_object_string(s):
293
    if not len(s) > 0 or s[0] != '/':
294
        raise ValueError
295
    s = s[1:]
296
    pos = s.find('/')
297
    if pos == -1 or pos == len(s) - 1:
298
        raise ValueError
299
    return s[:pos], s[(pos + 1):]
300

    
301
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
302
    """Copy or move an object."""
303
    
304
    meta, permissions, public = get_object_headers(request)
305
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
306
    try:
307
        if move:
308
            version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
309
                                                        dest_account, dest_container, dest_name,
310
                                                        'pithos', meta, False, permissions)
311
        else:
312
            version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
313
                                                        dest_account, dest_container, dest_name,
314
                                                        'pithos', meta, False, permissions, src_version)
315
    except NotAllowedError:
316
        raise Forbidden('Not allowed')
317
    except (NameError, IndexError):
318
        raise ItemNotFound('Container or object does not exist')
319
    except ValueError:
320
        raise BadRequest('Invalid sharing header')
321
    except AttributeError, e:
322
        raise Conflict('\n'.join(e.data) + '\n')
323
    except QuotaError:
324
        raise RequestEntityTooLarge('Quota exceeded')
325
    if public is not None:
326
        try:
327
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
328
        except NotAllowedError:
329
            raise Forbidden('Not allowed')
330
        except NameError:
331
            raise ItemNotFound('Object does not exist')
332
    return version_id
333

    
334
def get_int_parameter(p):
335
    if p is not None:
336
        try:
337
            p = int(p)
338
        except ValueError:
339
            return None
340
        if p < 0:
341
            return None
342
    return p
343

    
344
def get_content_length(request):
345
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
346
    if content_length is None:
347
        raise LengthRequired('Missing or invalid Content-Length header')
348
    return content_length
349

    
350
def get_range(request, size):
351
    """Parse a Range header from the request.
352
    
353
    Either returns None, when the header is not existent or should be ignored,
354
    or a list of (offset, length) tuples - should be further checked.
355
    """
356
    
357
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
358
    if not ranges.startswith('bytes='):
359
        return None
360
    
361
    ret = []
362
    for r in (x.strip() for x in ranges[6:].split(',')):
363
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
364
        m = p.match(r)
365
        if not m:
366
            return None
367
        offset = m.group('offset')
368
        upto = m.group('upto')
369
        if offset == '' and upto == '':
370
            return None
371
        
372
        if offset != '':
373
            offset = int(offset)
374
            if upto != '':
375
                upto = int(upto)
376
                if offset > upto:
377
                    return None
378
                ret.append((offset, upto - offset + 1))
379
            else:
380
                ret.append((offset, size - offset))
381
        else:
382
            length = int(upto)
383
            ret.append((size - length, length))
384
    
385
    return ret
386

    
387
def get_content_range(request):
388
    """Parse a Content-Range header from the request.
389
    
390
    Either returns None, when the header is not existent or should be ignored,
391
    or an (offset, length, total) tuple - check as length, total may be None.
392
    Returns (None, None, None) if the provided range is '*/*'.
393
    """
394
    
395
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
396
    if not ranges:
397
        return None
398
    
399
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
400
    m = p.match(ranges)
401
    if not m:
402
        if ranges == 'bytes */*':
403
            return (None, None, None)
404
        return None
405
    offset = int(m.group('offset'))
406
    upto = m.group('upto')
407
    total = m.group('total')
408
    if upto != '':
409
        upto = int(upto)
410
    else:
411
        upto = None
412
    if total != '*':
413
        total = int(total)
414
    else:
415
        total = None
416
    if (upto is not None and offset > upto) or \
417
        (total is not None and offset >= total) or \
418
        (total is not None and upto is not None and upto >= total):
419
        return None
420
    
421
    if upto is None:
422
        length = None
423
    else:
424
        length = upto - offset + 1
425
    return (offset, length, total)
426

    
427
def get_sharing(request):
428
    """Parse an X-Object-Sharing header from the request.
429
    
430
    Raises BadRequest on error.
431
    """
432
    
433
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
434
    if permissions is None:
435
        return None
436
    
437
    # TODO: Document or remove '~' replacing.
438
    permissions = permissions.replace('~', '')
439
    
440
    ret = {}
441
    permissions = permissions.replace(' ', '')
442
    if permissions == '':
443
        return ret
444
    for perm in (x for x in permissions.split(';')):
445
        if perm.startswith('read='):
446
            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
447
            if '' in ret['read']:
448
                ret['read'].remove('')
449
            if '*' in ret['read']:
450
                ret['read'] = ['*']
451
            if len(ret['read']) == 0:
452
                raise BadRequest('Bad X-Object-Sharing header value')
453
        elif perm.startswith('write='):
454
            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
455
            if '' in ret['write']:
456
                ret['write'].remove('')
457
            if '*' in ret['write']:
458
                ret['write'] = ['*']
459
            if len(ret['write']) == 0:
460
                raise BadRequest('Bad X-Object-Sharing header value')
461
        else:
462
            raise BadRequest('Bad X-Object-Sharing header value')
463
    
464
    # Keep duplicates only in write list.
465
    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
466
    if dups:
467
        for x in dups:
468
            ret['read'].remove(x)
469
        if len(ret['read']) == 0:
470
            del(ret['read'])
471
    
472
    return ret
473

    
474
def get_public(request):
475
    """Parse an X-Object-Public header from the request.
476
    
477
    Raises BadRequest on error.
478
    """
479
    
480
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
481
    if public is None:
482
        return None
483
    
484
    public = public.replace(' ', '').lower()
485
    if public == 'true':
486
        return True
487
    elif public == 'false' or public == '':
488
        return False
489
    raise BadRequest('Bad X-Object-Public header value')
490

    
491
def raw_input_socket(request):
492
    """Return the socket for reading the rest of the request."""
493
    
494
    server_software = request.META.get('SERVER_SOFTWARE')
495
    if server_software and server_software.startswith('mod_python'):
496
        return request._req
497
    if 'wsgi.input' in request.environ:
498
        return request.environ['wsgi.input']
499
    raise ServiceUnavailable('Unknown server software')
500

    
501
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
502

    
503
def socket_read_iterator(request, length=0, blocksize=4096):
504
    """Return a maximum of blocksize data read from the socket in each iteration.
505
    
506
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
507
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
508
    """
509
    
510
    sock = raw_input_socket(request)
511
    if length < 0: # Chunked transfers
512
        # Small version (server does the dechunking).
513
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
514
            while length < MAX_UPLOAD_SIZE:
515
                data = sock.read(blocksize)
516
                if data == '':
517
                    return
518
                yield data
519
            raise BadRequest('Maximum size is reached')
520
        
521
        # Long version (do the dechunking).
522
        data = ''
523
        while length < MAX_UPLOAD_SIZE:
524
            # Get chunk size.
525
            if hasattr(sock, 'readline'):
526
                chunk_length = sock.readline()
527
            else:
528
                chunk_length = ''
529
                while chunk_length[-1:] != '\n':
530
                    chunk_length += sock.read(1)
531
                chunk_length.strip()
532
            pos = chunk_length.find(';')
533
            if pos >= 0:
534
                chunk_length = chunk_length[:pos]
535
            try:
536
                chunk_length = int(chunk_length, 16)
537
            except Exception, e:
538
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
539
            # Check if done.
540
            if chunk_length == 0:
541
                if len(data) > 0:
542
                    yield data
543
                return
544
            # Get the actual data.
545
            while chunk_length > 0:
546
                chunk = sock.read(min(chunk_length, blocksize))
547
                chunk_length -= len(chunk)
548
                if length > 0:
549
                    length += len(chunk)
550
                data += chunk
551
                if len(data) >= blocksize:
552
                    ret = data[:blocksize]
553
                    data = data[blocksize:]
554
                    yield ret
555
            sock.read(2) # CRLF
556
        raise BadRequest('Maximum size is reached')
557
    else:
558
        if length > MAX_UPLOAD_SIZE:
559
            raise BadRequest('Maximum size is reached')
560
        while length > 0:
561
            data = sock.read(min(length, blocksize))
562
            if not data:
563
                raise BadRequest()
564
            length -= len(data)
565
            yield data
566

    
567
class SaveToBackendHandler(FileUploadHandler):
568
    """Handle a file from an HTML form the django way."""
569
    
570
    def __init__(self, request=None):
571
        super(SaveToBackendHandler, self).__init__(request)
572
        self.backend = request.backend
573
    
574
    def put_data(self, length):
575
        if len(self.data) >= length:
576
            block = self.data[:length]
577
            self.file.hashmap.append(self.backend.put_block(block))
578
            self.md5.update(block)
579
            self.data = self.data[length:]
580
    
581
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
582
        self.md5 = hashlib.md5()        
583
        self.data = ''
584
        self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
585
        self.file.size = 0
586
        self.file.hashmap = []
587
    
588
    def receive_data_chunk(self, raw_data, start):
589
        self.data += raw_data
590
        self.file.size += len(raw_data)
591
        self.put_data(self.request.backend.block_size)
592
        return None
593
    
594
    def file_complete(self, file_size):
595
        l = len(self.data)
596
        if l > 0:
597
            self.put_data(l)
598
        self.file.etag = self.md5.hexdigest().lower()
599
        return self.file
600

    
601
class ObjectWrapper(object):
602
    """Return the object's data block-per-block in each iteration.
603
    
604
    Read from the object using the offset and length provided in each entry of the range list.
605
    """
606
    
607
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
608
        self.backend = backend
609
        self.ranges = ranges
610
        self.sizes = sizes
611
        self.hashmaps = hashmaps
612
        self.boundary = boundary
613
        self.size = sum(self.sizes)
614
        
615
        self.file_index = 0
616
        self.block_index = 0
617
        self.block_hash = -1
618
        self.block = ''
619
        
620
        self.range_index = -1
621
        self.offset, self.length = self.ranges[0]
622
    
623
    def __iter__(self):
624
        return self
625
    
626
    def part_iterator(self):
627
        if self.length > 0:
628
            # Get the file for the current offset.
629
            file_size = self.sizes[self.file_index]
630
            while self.offset >= file_size:
631
                self.offset -= file_size
632
                self.file_index += 1
633
                file_size = self.sizes[self.file_index]
634
            
635
            # Get the block for the current position.
636
            self.block_index = int(self.offset / self.backend.block_size)
637
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
638
                self.block_hash = self.hashmaps[self.file_index][self.block_index]
639
                try:
640
                    self.block = self.backend.get_block(self.block_hash)
641
                except NameError:
642
                    raise ItemNotFound('Block does not exist')
643
            
644
            # Get the data from the block.
645
            bo = self.offset % self.backend.block_size
646
            bl = min(self.length, len(self.block) - bo)
647
            data = self.block[bo:bo + bl]
648
            self.offset += bl
649
            self.length -= bl
650
            return data
651
        else:
652
            raise StopIteration
653
    
654
    def next(self):
655
        if len(self.ranges) == 1:
656
            return self.part_iterator()
657
        if self.range_index == len(self.ranges):
658
            raise StopIteration
659
        try:
660
            if self.range_index == -1:
661
                raise StopIteration
662
            return self.part_iterator()
663
        except StopIteration:
664
            self.range_index += 1
665
            out = []
666
            if self.range_index < len(self.ranges):
667
                # Part header.
668
                self.offset, self.length = self.ranges[self.range_index]
669
                self.file_index = 0
670
                if self.range_index > 0:
671
                    out.append('')
672
                out.append('--' + self.boundary)
673
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
674
                out.append('Content-Transfer-Encoding: binary')
675
                out.append('')
676
                out.append('')
677
                return '\r\n'.join(out)
678
            else:
679
                # Footer.
680
                out.append('')
681
                out.append('--' + self.boundary + '--')
682
                out.append('')
683
                return '\r\n'.join(out)
684

    
685
def object_data_response(request, sizes, hashmaps, meta, public=False):
686
    """Get the HttpResponse object for replying with the object's data."""
687
    
688
    # Range handling.
689
    size = sum(sizes)
690
    ranges = get_range(request, size)
691
    if ranges is None:
692
        ranges = [(0, size)]
693
        ret = 200
694
    else:
695
        check = [True for offset, length in ranges if
696
                    length <= 0 or length > size or
697
                    offset < 0 or offset >= size or
698
                    offset + length > size]
699
        if len(check) > 0:
700
            raise RangeNotSatisfiable('Requested range exceeds object limits')
701
        ret = 206
702
        if_range = request.META.get('HTTP_IF_RANGE')
703
        if if_range:
704
            try:
705
                # Modification time has passed instead.
706
                last_modified = parse_http_date(if_range)
707
                if last_modified != meta['modified']:
708
                    ranges = [(0, size)]
709
                    ret = 200
710
            except ValueError:
711
                if if_range != meta['ETag']:
712
                    ranges = [(0, size)]
713
                    ret = 200
714
    
715
    if ret == 206 and len(ranges) > 1:
716
        boundary = uuid.uuid4().hex
717
    else:
718
        boundary = ''
719
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
720
    response = HttpResponse(wrapper, status=ret)
721
    put_object_headers(response, meta, public)
722
    if ret == 206:
723
        if len(ranges) == 1:
724
            offset, length = ranges[0]
725
            response['Content-Length'] = length # Update with the correct length.
726
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
727
        else:
728
            del(response['Content-Length'])
729
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
730
    return response
731

    
732
def put_object_block(request, hashmap, data, offset):
733
    """Put one block of data at the given offset."""
734
    
735
    bi = int(offset / request.backend.block_size)
736
    bo = offset % request.backend.block_size
737
    bl = min(len(data), request.backend.block_size - bo)
738
    if bi < len(hashmap):
739
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
740
    else:
741
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
742
    return bl # Return ammount of data written.
743

    
744
#def hashmap_hash(request, hashmap):
745
#    """Produce the root hash, treating the hashmap as a Merkle-like tree."""
746
#    
747
#    map = HashMap(request.backend.block_size, request.backend.hash_algorithm)
748
#    map.extend([unhexlify(x) for x in hashmap])
749
#    return hexlify(map.hash())
750

    
751
def hashmap_md5(request, hashmap, size):
752
    """Produce the MD5 sum from the data in the hashmap."""
753
    
754
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
755
    md5 = hashlib.md5()
756
    bs = request.backend.block_size
757
    for bi, hash in enumerate(hashmap):
758
        data = request.backend.get_block(hash)
759
        if bi == len(hashmap) - 1:
760
            bs = size % bs
761
        pad = bs - min(len(data), bs)
762
        md5.update(data + ('\x00' * pad))
763
    return md5.hexdigest().lower()
764

    
765
def update_request_headers(request):
766
    # Handle URL-encoded keys and values.
767
    # Handle URL-encoded keys and values.
768
    meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
769
    if len(meta) > 90:
770
        raise BadRequest('Too many headers.')
771
    for k, v in meta.iteritems():
772
        if len(k) > 128:
773
            raise BadRequest('Header name too large.')
774
        if len(v) > 256:
775
            raise BadRequest('Header value too large.')
776
        try:
777
            k.decode('ascii')
778
            v.decode('ascii')
779
        except UnicodeDecodeError:
780
            raise BadRequest('Bad character in headers.')
781
        if '%' in k or '%' in v:
782
            del(request.META[k])
783
            request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
784

    
785
def update_response_headers(request, response):
786
    if request.serialization == 'xml':
787
        response['Content-Type'] = 'application/xml; charset=UTF-8'
788
    elif request.serialization == 'json':
789
        response['Content-Type'] = 'application/json; charset=UTF-8'
790
    elif not response['Content-Type']:
791
        response['Content-Type'] = 'text/plain; charset=UTF-8'
792
    
793
    if (not response.has_header('Content-Length') and
794
        not (response.has_header('Content-Type') and
795
             response['Content-Type'].startswith('multipart/byteranges'))):
796
        response['Content-Length'] = len(response.content)
797
    
798
    # URL-encode unicode in headers.
799
    meta = response.items()
800
    for k, v in meta:
801
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
802
            k.startswith('X-Object-') or k.startswith('Content-')):
803
            del(response[k])
804
            response[quote(k)] = quote(v, safe='/=,:@; ')
805
    
806
    if settings.TEST:
807
        response['Date'] = format_date_time(time())
808

    
809
def render_fault(request, fault):
810
    if settings.DEBUG or settings.TEST:
811
        fault.details = format_exc(fault)
812
    
813
    request.serialization = 'text'
814
    data = '\n'.join((fault.message, fault.details)) + '\n'
815
    response = HttpResponse(data, status=fault.code)
816
    update_response_headers(request, response)
817
    return response
818

    
819
def request_serialization(request, format_allowed=False):
820
    """Return the serialization format requested.
821
    
822
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
823
    """
824
    
825
    if not format_allowed:
826
        return 'text'
827
    
828
    format = request.GET.get('format')
829
    if format == 'json':
830
        return 'json'
831
    elif format == 'xml':
832
        return 'xml'
833
    
834
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
835
        accept, sep, rest = item.strip().partition(';')
836
        if accept == 'application/json':
837
            return 'json'
838
        elif accept == 'application/xml' or accept == 'text/xml':
839
            return 'xml'
840
    
841
    return 'text'
842

    
843
def api_method(http_method=None, format_allowed=False, user_required=True):
844
    """Decorator function for views that implement an API method."""
845
    
846
    def decorator(func):
847
        @wraps(func)
848
        def wrapper(request, *args, **kwargs):
849
            try:
850
                if http_method and request.method != http_method:
851
                    raise BadRequest('Method not allowed.')
852
                if user_required and getattr(request, 'user', None) is None:
853
                    raise Unauthorized('Access denied')
854
                
855
                # The args variable may contain up to (account, container, object).
856
                if len(args) > 1 and len(args[1]) > 256:
857
                    raise BadRequest('Container name too large.')
858
                if len(args) > 2 and len(args[2]) > 1024:
859
                    raise BadRequest('Object name too large.')
860
                
861
                # Format and check headers.
862
                update_request_headers(request)
863
                
864
                # Fill in custom request variables.
865
                request.serialization = request_serialization(request, format_allowed)
866
                request.backend = connect_backend()
867
                
868
                response = func(request, *args, **kwargs)
869
                update_response_headers(request, response)
870
                return response
871
            except Fault, fault:
872
                return render_fault(request, fault)
873
            except BaseException, e:
874
                logger.exception('Unexpected error: %s' % e)
875
                fault = ServiceUnavailable('Unexpected error')
876
                return render_fault(request, fault)
877
            finally:
878
                if getattr(request, 'backend', None) is not None:
879
                    request.backend.close()
880
        return wrapper
881
    return decorator