Statistics
| Branch: | Tag: | Revision:

root / pithos / api / util.py @ 2bdc9dc4

History | View | Annotate | Download (33.1 kB)

1
# Copyright 2011 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.utils import simplejson as json
45
from django.utils.http import http_date, parse_etags
46
from django.utils.encoding import smart_unicode, smart_str
47
from django.core.files.uploadhandler import FileUploadHandler
48
from django.core.files.uploadedfile import UploadedFile
49

    
50
from pithos.lib.compat import parse_http_date_safe, parse_http_date
51
from pithos.lib.hashmap import HashMap
52

    
53
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
54
                                Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
55
                                RangeNotSatisfiable, ServiceUnavailable)
56
from pithos.api.short_url import encode_url
57
from pithos.backends import connect_backend
58
from pithos.backends.base import NotAllowedError, QuotaError
59

    
60
import logging
61
import re
62
import hashlib
63
import uuid
64
import decimal
65

    
66

    
67
logger = logging.getLogger(__name__)
68

    
69

    
70
class UTC(tzinfo):
71
   def utcoffset(self, dt):
72
       return timedelta(0)
73

    
74
   def tzname(self, dt):
75
       return 'UTC'
76

    
77
   def dst(self, dt):
78
       return timedelta(0)
79

    
80
def json_encode_decimal(obj):
81
    if isinstance(obj, decimal.Decimal):
82
        return str(obj)
83
    raise TypeError(repr(obj) + " is not JSON serializable")
84

    
85
def isoformat(d):
86
   """Return an ISO8601 date string that includes a timezone."""
87

    
88
   return d.replace(tzinfo=UTC()).isoformat()
89

    
90
def rename_meta_key(d, old, new):
91
    if old not in d:
92
        return
93
    d[new] = d[old]
94
    del(d[old])
95

    
96
def printable_header_dict(d):
97
    """Format a meta dictionary for printing out json/xml.
98
    
99
    Convert all keys to lower case and replace dashes with underscores.
100
    Format 'last_modified' timestamp.
101
    """
102
    
103
    if 'last_modified' in d:
104
        d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
105
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
106

    
107
def format_header_key(k):
108
    """Convert underscores to dashes and capitalize intra-dash strings."""
109
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
110

    
111
def get_header_prefix(request, prefix):
112
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
113
    
114
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
115
    # TODO: Document or remove '~' replacing.
116
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
117

    
118
def get_account_headers(request):
119
    meta = get_header_prefix(request, 'X-Account-Meta-')
120
    groups = {}
121
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
122
        n = k[16:].lower()
123
        if '-' in n or '_' in n:
124
            raise BadRequest('Bad characters in group name')
125
        groups[n] = v.replace(' ', '').split(',')
126
        while '' in groups[n]:
127
            groups[n].remove('')
128
    return meta, groups
129

    
130
def put_account_headers(response, meta, groups, policy):
131
    if 'count' in meta:
132
        response['X-Account-Container-Count'] = meta['count']
133
    if 'bytes' in meta:
134
        response['X-Account-Bytes-Used'] = meta['bytes']
135
    response['Last-Modified'] = http_date(int(meta['modified']))
136
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
137
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
138
    if 'until_timestamp' in meta:
139
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
140
    for k, v in groups.iteritems():
141
        k = smart_str(k, strings_only=True)
142
        k = format_header_key('X-Account-Group-' + k)
143
        v = smart_str(','.join(v), strings_only=True)
144
        response[k] = v
145
    for k, v in policy.iteritems():
146
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
147

    
148
def get_container_headers(request):
149
    meta = get_header_prefix(request, 'X-Container-Meta-')
150
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
151
    return meta, policy
152

    
153
def put_container_headers(request, response, meta, policy):
154
    if 'count' in meta:
155
        response['X-Container-Object-Count'] = meta['count']
156
    if 'bytes' in meta:
157
        response['X-Container-Bytes-Used'] = meta['bytes']
158
    response['Last-Modified'] = http_date(int(meta['modified']))
159
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
160
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
161
    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
162
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
163
    response['X-Container-Block-Size'] = request.backend.block_size
164
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
165
    if 'until_timestamp' in meta:
166
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
167
    for k, v in policy.iteritems():
168
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
169

    
170
def get_object_headers(request):
171
    meta = get_header_prefix(request, 'X-Object-Meta-')
172
    if request.META.get('CONTENT_TYPE'):
173
        meta['Content-Type'] = request.META['CONTENT_TYPE']
174
    if request.META.get('HTTP_CONTENT_ENCODING'):
175
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
176
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
177
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
178
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
179
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
180
    return meta, get_sharing(request), get_public(request)
181

    
182
def put_object_headers(response, meta, restricted=False):
183
    response['ETag'] = meta['ETag'] if 'ETag' in meta else meta['hash']
184
    response['Content-Length'] = meta['bytes']
185
    response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
186
    response['Last-Modified'] = http_date(int(meta['modified']))
187
    if not restricted:
188
        response['X-Object-Hash'] = meta['hash']
189
        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
190
        response['X-Object-Version'] = meta['version']
191
        response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
192
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
193
            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
194
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
195
                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
196
                  'X-Object-Public'):
197
            if k in meta:
198
                response[k] = smart_str(meta[k], strings_only=True)
199
    else:
200
        for k in ('Content-Encoding', 'Content-Disposition'):
201
            if k in meta:
202
                response[k] = smart_str(meta[k], strings_only=True)
203

    
204
def update_manifest_meta(request, v_account, meta):
205
    """Update metadata if the object has an X-Object-Manifest."""
206
    
207
    if 'X-Object-Manifest' in meta:
208
        etag = ''
209
        bytes = 0
210
        try:
211
            src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
212
            objects = request.backend.list_objects(request.user_uniq, v_account,
213
                                src_container, prefix=src_name, virtual=False)
214
            for x in objects:
215
                src_meta = request.backend.get_object_meta(request.user_uniq,
216
                                        v_account, src_container, x[0], x[1])
217
                etag += src_meta['ETag']
218
                bytes += src_meta['bytes']
219
        except:
220
            # Ignore errors.
221
            return
222
        meta['bytes'] = bytes
223
        md5 = hashlib.md5()
224
        md5.update(etag)
225
        meta['ETag'] = md5.hexdigest().lower()
226

    
227
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
228
    if permissions is None:
229
        return
230
    allowed, perm_path, perms = permissions
231
    if len(perms) == 0:
232
        return
233
    ret = []
234
    r = ','.join(perms.get('read', []))
235
    if r:
236
        ret.append('read=' + r)
237
    w = ','.join(perms.get('write', []))
238
    if w:
239
        ret.append('write=' + w)
240
    meta['X-Object-Sharing'] = '; '.join(ret)
241
    if '/'.join((v_account, v_container, v_object)) != perm_path:
242
        meta['X-Object-Shared-By'] = perm_path
243
    if request.user_uniq != v_account:
244
        meta['X-Object-Allowed-To'] = allowed
245

    
246
def update_public_meta(public, meta):
247
    if not public:
248
        return
249
    meta['X-Object-Public'] = '/public/' + encode_url(public)
250

    
251
def validate_modification_preconditions(request, meta):
252
    """Check that the modified timestamp conforms with the preconditions set."""
253
    
254
    if 'modified' not in meta:
255
        return # TODO: Always return?
256
    
257
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
258
    if if_modified_since is not None:
259
        if_modified_since = parse_http_date_safe(if_modified_since)
260
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
261
        raise NotModified('Resource has not been modified')
262
    
263
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
264
    if if_unmodified_since is not None:
265
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
266
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
267
        raise PreconditionFailed('Resource has been modified')
268

    
269
def validate_matching_preconditions(request, meta):
270
    """Check that the ETag conforms with the preconditions set."""
271
    
272
    etag = meta.get('ETag', None)
273
    
274
    if_match = request.META.get('HTTP_IF_MATCH')
275
    if if_match is not None:
276
        if etag is None:
277
            raise PreconditionFailed('Resource does not exist')
278
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
279
            raise PreconditionFailed('Resource ETag does not match')
280
    
281
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
282
    if if_none_match is not None:
283
        # TODO: If this passes, must ignore If-Modified-Since header.
284
        if etag is not None:
285
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
286
                # TODO: Continue if an If-Modified-Since header is present.
287
                if request.method in ('HEAD', 'GET'):
288
                    raise NotModified('Resource ETag matches')
289
                raise PreconditionFailed('Resource exists or ETag matches')
290

    
291
def split_container_object_string(s):
292
    if not len(s) > 0 or s[0] != '/':
293
        raise ValueError
294
    s = s[1:]
295
    pos = s.find('/')
296
    if pos == -1 or pos == len(s) - 1:
297
        raise ValueError
298
    return s[:pos], s[(pos + 1):]
299

    
300
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
301
    """Copy or move an object."""
302
    
303
    meta, permissions, public = get_object_headers(request)
304
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
305
    try:
306
        if move:
307
            version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
308
                                                        dest_account, dest_container, dest_name,
309
                                                        meta, False, permissions)
310
        else:
311
            version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
312
                                                        dest_account, dest_container, dest_name,
313
                                                        meta, False, permissions, src_version)
314
    except NotAllowedError:
315
        raise Forbidden('Not allowed')
316
    except (NameError, IndexError):
317
        raise ItemNotFound('Container or object does not exist')
318
    except ValueError:
319
        raise BadRequest('Invalid sharing header')
320
    except AttributeError, e:
321
        raise Conflict('\n'.join(e.data) + '\n')
322
    except QuotaError:
323
        raise RequestEntityTooLarge('Quota exceeded')
324
    if public is not None:
325
        try:
326
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
327
        except NotAllowedError:
328
            raise Forbidden('Not allowed')
329
        except NameError:
330
            raise ItemNotFound('Object does not exist')
331
    return version_id
332

    
333
def get_int_parameter(p):
334
    if p is not None:
335
        try:
336
            p = int(p)
337
        except ValueError:
338
            return None
339
        if p < 0:
340
            return None
341
    return p
342

    
343
def get_content_length(request):
344
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
345
    if content_length is None:
346
        raise LengthRequired('Missing or invalid Content-Length header')
347
    return content_length
348

    
349
def get_range(request, size):
350
    """Parse a Range header from the request.
351
    
352
    Either returns None, when the header is not existent or should be ignored,
353
    or a list of (offset, length) tuples - should be further checked.
354
    """
355
    
356
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
357
    if not ranges.startswith('bytes='):
358
        return None
359
    
360
    ret = []
361
    for r in (x.strip() for x in ranges[6:].split(',')):
362
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
363
        m = p.match(r)
364
        if not m:
365
            return None
366
        offset = m.group('offset')
367
        upto = m.group('upto')
368
        if offset == '' and upto == '':
369
            return None
370
        
371
        if offset != '':
372
            offset = int(offset)
373
            if upto != '':
374
                upto = int(upto)
375
                if offset > upto:
376
                    return None
377
                ret.append((offset, upto - offset + 1))
378
            else:
379
                ret.append((offset, size - offset))
380
        else:
381
            length = int(upto)
382
            ret.append((size - length, length))
383
    
384
    return ret
385

    
386
def get_content_range(request):
387
    """Parse a Content-Range header from the request.
388
    
389
    Either returns None, when the header is not existent or should be ignored,
390
    or an (offset, length, total) tuple - check as length, total may be None.
391
    Returns (None, None, None) if the provided range is '*/*'.
392
    """
393
    
394
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
395
    if not ranges:
396
        return None
397
    
398
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
399
    m = p.match(ranges)
400
    if not m:
401
        if ranges == 'bytes */*':
402
            return (None, None, None)
403
        return None
404
    offset = int(m.group('offset'))
405
    upto = m.group('upto')
406
    total = m.group('total')
407
    if upto != '':
408
        upto = int(upto)
409
    else:
410
        upto = None
411
    if total != '*':
412
        total = int(total)
413
    else:
414
        total = None
415
    if (upto is not None and offset > upto) or \
416
        (total is not None and offset >= total) or \
417
        (total is not None and upto is not None and upto >= total):
418
        return None
419
    
420
    if upto is None:
421
        length = None
422
    else:
423
        length = upto - offset + 1
424
    return (offset, length, total)
425

    
426
def get_sharing(request):
427
    """Parse an X-Object-Sharing header from the request.
428
    
429
    Raises BadRequest on error.
430
    """
431
    
432
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
433
    if permissions is None:
434
        return None
435
    
436
    # TODO: Document or remove '~' replacing.
437
    permissions = permissions.replace('~', '')
438
    
439
    ret = {}
440
    permissions = permissions.replace(' ', '')
441
    if permissions == '':
442
        return ret
443
    for perm in (x for x in permissions.split(';')):
444
        if perm.startswith('read='):
445
            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
446
            if '' in ret['read']:
447
                ret['read'].remove('')
448
            if '*' in ret['read']:
449
                ret['read'] = ['*']
450
            if len(ret['read']) == 0:
451
                raise BadRequest('Bad X-Object-Sharing header value')
452
        elif perm.startswith('write='):
453
            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
454
            if '' in ret['write']:
455
                ret['write'].remove('')
456
            if '*' in ret['write']:
457
                ret['write'] = ['*']
458
            if len(ret['write']) == 0:
459
                raise BadRequest('Bad X-Object-Sharing header value')
460
        else:
461
            raise BadRequest('Bad X-Object-Sharing header value')
462
    
463
    # Keep duplicates only in write list.
464
    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
465
    if dups:
466
        for x in dups:
467
            ret['read'].remove(x)
468
        if len(ret['read']) == 0:
469
            del(ret['read'])
470
    
471
    return ret
472

    
473
def get_public(request):
474
    """Parse an X-Object-Public header from the request.
475
    
476
    Raises BadRequest on error.
477
    """
478
    
479
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
480
    if public is None:
481
        return None
482
    
483
    public = public.replace(' ', '').lower()
484
    if public == 'true':
485
        return True
486
    elif public == 'false' or public == '':
487
        return False
488
    raise BadRequest('Bad X-Object-Public header value')
489

    
490
def raw_input_socket(request):
491
    """Return the socket for reading the rest of the request."""
492
    
493
    server_software = request.META.get('SERVER_SOFTWARE')
494
    if server_software and server_software.startswith('mod_python'):
495
        return request._req
496
    if 'wsgi.input' in request.environ:
497
        return request.environ['wsgi.input']
498
    raise ServiceUnavailable('Unknown server software')
499

    
500
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
501

    
502
def socket_read_iterator(request, length=0, blocksize=4096):
503
    """Return a maximum of blocksize data read from the socket in each iteration.
504
    
505
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
506
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
507
    """
508
    
509
    sock = raw_input_socket(request)
510
    if length < 0: # Chunked transfers
511
        # Small version (server does the dechunking).
512
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
513
            while length < MAX_UPLOAD_SIZE:
514
                data = sock.read(blocksize)
515
                if data == '':
516
                    return
517
                yield data
518
            raise BadRequest('Maximum size is reached')
519
        
520
        # Long version (do the dechunking).
521
        data = ''
522
        while length < MAX_UPLOAD_SIZE:
523
            # Get chunk size.
524
            if hasattr(sock, 'readline'):
525
                chunk_length = sock.readline()
526
            else:
527
                chunk_length = ''
528
                while chunk_length[-1:] != '\n':
529
                    chunk_length += sock.read(1)
530
                chunk_length.strip()
531
            pos = chunk_length.find(';')
532
            if pos >= 0:
533
                chunk_length = chunk_length[:pos]
534
            try:
535
                chunk_length = int(chunk_length, 16)
536
            except Exception, e:
537
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
538
            # Check if done.
539
            if chunk_length == 0:
540
                if len(data) > 0:
541
                    yield data
542
                return
543
            # Get the actual data.
544
            while chunk_length > 0:
545
                chunk = sock.read(min(chunk_length, blocksize))
546
                chunk_length -= len(chunk)
547
                if length > 0:
548
                    length += len(chunk)
549
                data += chunk
550
                if len(data) >= blocksize:
551
                    ret = data[:blocksize]
552
                    data = data[blocksize:]
553
                    yield ret
554
            sock.read(2) # CRLF
555
        raise BadRequest('Maximum size is reached')
556
    else:
557
        if length > MAX_UPLOAD_SIZE:
558
            raise BadRequest('Maximum size is reached')
559
        while length > 0:
560
            data = sock.read(min(length, blocksize))
561
            if not data:
562
                raise BadRequest()
563
            length -= len(data)
564
            yield data
565

    
566
class SaveToBackendHandler(FileUploadHandler):
567
    """Handle a file from an HTML form the django way."""
568
    
569
    def __init__(self, request=None):
570
        super(SaveToBackendHandler, self).__init__(request)
571
        self.backend = request.backend
572
    
573
    def put_data(self, length):
574
        if len(self.data) >= length:
575
            block = self.data[:length]
576
            self.file.hashmap.append(self.backend.put_block(block))
577
            self.md5.update(block)
578
            self.data = self.data[length:]
579
    
580
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
581
        self.md5 = hashlib.md5()        
582
        self.data = ''
583
        self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
584
        self.file.size = 0
585
        self.file.hashmap = []
586
    
587
    def receive_data_chunk(self, raw_data, start):
588
        self.data += raw_data
589
        self.file.size += len(raw_data)
590
        self.put_data(self.request.backend.block_size)
591
        return None
592
    
593
    def file_complete(self, file_size):
594
        l = len(self.data)
595
        if l > 0:
596
            self.put_data(l)
597
        self.file.etag = self.md5.hexdigest().lower()
598
        return self.file
599

    
600
class ObjectWrapper(object):
601
    """Return the object's data block-per-block in each iteration.
602
    
603
    Read from the object using the offset and length provided in each entry of the range list.
604
    """
605
    
606
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
607
        self.backend = backend
608
        self.ranges = ranges
609
        self.sizes = sizes
610
        self.hashmaps = hashmaps
611
        self.boundary = boundary
612
        self.size = sum(self.sizes)
613
        
614
        self.file_index = 0
615
        self.block_index = 0
616
        self.block_hash = -1
617
        self.block = ''
618
        
619
        self.range_index = -1
620
        self.offset, self.length = self.ranges[0]
621
    
622
    def __iter__(self):
623
        return self
624
    
625
    def part_iterator(self):
626
        if self.length > 0:
627
            # Get the file for the current offset.
628
            file_size = self.sizes[self.file_index]
629
            while self.offset >= file_size:
630
                self.offset -= file_size
631
                self.file_index += 1
632
                file_size = self.sizes[self.file_index]
633
            
634
            # Get the block for the current position.
635
            self.block_index = int(self.offset / self.backend.block_size)
636
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
637
                self.block_hash = self.hashmaps[self.file_index][self.block_index]
638
                try:
639
                    self.block = self.backend.get_block(self.block_hash)
640
                except NameError:
641
                    raise ItemNotFound('Block does not exist')
642
            
643
            # Get the data from the block.
644
            bo = self.offset % self.backend.block_size
645
            bl = min(self.length, len(self.block) - bo)
646
            data = self.block[bo:bo + bl]
647
            self.offset += bl
648
            self.length -= bl
649
            return data
650
        else:
651
            raise StopIteration
652
    
653
    def next(self):
654
        if len(self.ranges) == 1:
655
            return self.part_iterator()
656
        if self.range_index == len(self.ranges):
657
            raise StopIteration
658
        try:
659
            if self.range_index == -1:
660
                raise StopIteration
661
            return self.part_iterator()
662
        except StopIteration:
663
            self.range_index += 1
664
            out = []
665
            if self.range_index < len(self.ranges):
666
                # Part header.
667
                self.offset, self.length = self.ranges[self.range_index]
668
                self.file_index = 0
669
                if self.range_index > 0:
670
                    out.append('')
671
                out.append('--' + self.boundary)
672
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
673
                out.append('Content-Transfer-Encoding: binary')
674
                out.append('')
675
                out.append('')
676
                return '\r\n'.join(out)
677
            else:
678
                # Footer.
679
                out.append('')
680
                out.append('--' + self.boundary + '--')
681
                out.append('')
682
                return '\r\n'.join(out)
683

    
684
def object_data_response(request, sizes, hashmaps, meta, public=False):
685
    """Get the HttpResponse object for replying with the object's data."""
686
    
687
    # Range handling.
688
    size = sum(sizes)
689
    ranges = get_range(request, size)
690
    if ranges is None:
691
        ranges = [(0, size)]
692
        ret = 200
693
    else:
694
        check = [True for offset, length in ranges if
695
                    length <= 0 or length > size or
696
                    offset < 0 or offset >= size or
697
                    offset + length > size]
698
        if len(check) > 0:
699
            raise RangeNotSatisfiable('Requested range exceeds object limits')
700
        ret = 206
701
        if_range = request.META.get('HTTP_IF_RANGE')
702
        if if_range:
703
            try:
704
                # Modification time has passed instead.
705
                last_modified = parse_http_date(if_range)
706
                if last_modified != meta['modified']:
707
                    ranges = [(0, size)]
708
                    ret = 200
709
            except ValueError:
710
                if if_range != meta['ETag']:
711
                    ranges = [(0, size)]
712
                    ret = 200
713
    
714
    if ret == 206 and len(ranges) > 1:
715
        boundary = uuid.uuid4().hex
716
    else:
717
        boundary = ''
718
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
719
    response = HttpResponse(wrapper, status=ret)
720
    put_object_headers(response, meta, public)
721
    if ret == 206:
722
        if len(ranges) == 1:
723
            offset, length = ranges[0]
724
            response['Content-Length'] = length # Update with the correct length.
725
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
726
        else:
727
            del(response['Content-Length'])
728
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
729
    return response
730

    
731
def put_object_block(request, hashmap, data, offset):
732
    """Put one block of data at the given offset."""
733
    
734
    bi = int(offset / request.backend.block_size)
735
    bo = offset % request.backend.block_size
736
    bl = min(len(data), request.backend.block_size - bo)
737
    if bi < len(hashmap):
738
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
739
    else:
740
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
741
    return bl # Return ammount of data written.
742

    
743
def hashmap_hash(request, hashmap):
744
    """Produce the root hash, treating the hashmap as a Merkle-like tree."""
745
    
746
    map = HashMap(request.backend.block_size, request.backend.hash_algorithm)
747
    map.extend([unhexlify(x) for x in hashmap])
748
    return hexlify(map.hash())
749

    
750
def update_request_headers(request):
751
    # Handle URL-encoded keys and values.
752
    # Handle URL-encoded keys and values.
753
    meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
754
    if len(meta) > 90:
755
        raise BadRequest('Too many headers.')
756
    for k, v in meta.iteritems():
757
        if len(k) > 128:
758
            raise BadRequest('Header name too large.')
759
        if len(v) > 256:
760
            raise BadRequest('Header value too large.')
761
        try:
762
            k.decode('ascii')
763
            v.decode('ascii')
764
        except UnicodeDecodeError:
765
            raise BadRequest('Bad character in headers.')
766
        if '%' in k or '%' in v:
767
            del(request.META[k])
768
            request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
769

    
770
def update_response_headers(request, response):
771
    if request.serialization == 'xml':
772
        response['Content-Type'] = 'application/xml; charset=UTF-8'
773
    elif request.serialization == 'json':
774
        response['Content-Type'] = 'application/json; charset=UTF-8'
775
    elif not response['Content-Type']:
776
        response['Content-Type'] = 'text/plain; charset=UTF-8'
777
    
778
    if (not response.has_header('Content-Length') and
779
        not (response.has_header('Content-Type') and
780
             response['Content-Type'].startswith('multipart/byteranges'))):
781
        response['Content-Length'] = len(response.content)
782
    
783
    # URL-encode unicode in headers.
784
    meta = response.items()
785
    for k, v in meta:
786
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
787
            k.startswith('X-Object-') or k.startswith('Content-')):
788
            del(response[k])
789
            response[quote(k)] = quote(v, safe='/=,:@; ')
790
    
791
    if settings.TEST:
792
        response['Date'] = format_date_time(time())
793

    
794
def render_fault(request, fault):
795
    if settings.DEBUG or settings.TEST:
796
        fault.details = format_exc(fault)
797
    
798
    request.serialization = 'text'
799
    data = '\n'.join((fault.message, fault.details)) + '\n'
800
    response = HttpResponse(data, status=fault.code)
801
    update_response_headers(request, response)
802
    return response
803

    
804
def request_serialization(request, format_allowed=False):
805
    """Return the serialization format requested.
806
    
807
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
808
    """
809
    
810
    if not format_allowed:
811
        return 'text'
812
    
813
    format = request.GET.get('format')
814
    if format == 'json':
815
        return 'json'
816
    elif format == 'xml':
817
        return 'xml'
818
    
819
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
820
        accept, sep, rest = item.strip().partition(';')
821
        if accept == 'application/json':
822
            return 'json'
823
        elif accept == 'application/xml' or accept == 'text/xml':
824
            return 'xml'
825
    
826
    return 'text'
827

    
828
def api_method(http_method=None, format_allowed=False, user_required=True):
829
    """Decorator function for views that implement an API method."""
830
    
831
    def decorator(func):
832
        @wraps(func)
833
        def wrapper(request, *args, **kwargs):
834
            try:
835
                if http_method and request.method != http_method:
836
                    raise BadRequest('Method not allowed.')
837
                if user_required and getattr(request, 'user', None) is None:
838
                    raise Unauthorized('Access denied')
839
                
840
                # The args variable may contain up to (account, container, object).
841
                if len(args) > 1 and len(args[1]) > 256:
842
                    raise BadRequest('Container name too large.')
843
                if len(args) > 2 and len(args[2]) > 1024:
844
                    raise BadRequest('Object name too large.')
845
                
846
                # Format and check headers.
847
                update_request_headers(request)
848
                
849
                # Fill in custom request variables.
850
                request.serialization = request_serialization(request, format_allowed)
851
                request.backend = connect_backend()
852
                
853
                response = func(request, *args, **kwargs)
854
                update_response_headers(request, response)
855
                return response
856
            except Fault, fault:
857
                return render_fault(request, fault)
858
            except BaseException, e:
859
                logger.exception('Unexpected error: %s' % e)
860
                fault = ServiceUnavailable('Unexpected error')
861
                return render_fault(request, fault)
862
            finally:
863
                if getattr(request, 'backend', None) is not None:
864
                    request.backend.close()
865
        return wrapper
866
    return decorator