Statistics
| Branch: | Tag: | Revision:

root / pithos / api / util.py @ 9fefc052

History | View | Annotate | Download (33.7 kB)

1
# Copyright 2011 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.utils import simplejson as json
45
from django.utils.http import http_date, parse_etags
46
from django.utils.encoding import smart_str
47
from django.core.files.uploadhandler import FileUploadHandler
48
from django.core.files.uploadedfile import UploadedFile
49

    
50
from pithos.api.compat import parse_http_date_safe, parse_http_date
51
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
52
                                Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
53
                                RangeNotSatisfiable, ServiceUnavailable)
54
from pithos.api.short_url import encode_url
55
from pithos.backends import connect_backend
56
from pithos.backends.base import NotAllowedError, QuotaError
57

    
58
import logging
59
import re
60
import hashlib
61
import uuid
62
import decimal
63

    
64

    
65
logger = logging.getLogger(__name__)
66

    
67

    
68
class UTC(tzinfo):
69
   def utcoffset(self, dt):
70
       return timedelta(0)
71

    
72
   def tzname(self, dt):
73
       return 'UTC'
74

    
75
   def dst(self, dt):
76
       return timedelta(0)
77

    
78
def json_encode_decimal(obj):
79
    if isinstance(obj, decimal.Decimal):
80
        return str(obj)
81
    raise TypeError(repr(obj) + " is not JSON serializable")
82

    
83
def isoformat(d):
84
   """Return an ISO8601 date string that includes a timezone."""
85

    
86
   return d.replace(tzinfo=UTC()).isoformat()
87

    
88
def rename_meta_key(d, old, new):
89
    if old not in d:
90
        return
91
    d[new] = d[old]
92
    del(d[old])
93

    
94
def printable_header_dict(d):
95
    """Format a meta dictionary for printing out json/xml.
96
    
97
    Convert all keys to lower case and replace dashes with underscores.
98
    Format 'last_modified' timestamp.
99
    """
100
    
101
    d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
102
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
103

    
104
def format_header_key(k):
105
    """Convert underscores to dashes and capitalize intra-dash strings."""
106
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
107

    
108
def get_header_prefix(request, prefix):
109
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
110
    
111
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
112
    # TODO: Document or remove '~' replacing.
113
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
114

    
115
def get_account_headers(request):
116
    meta = get_header_prefix(request, 'X-Account-Meta-')
117
    groups = {}
118
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
119
        n = k[16:].lower()
120
        if '-' in n or '_' in n:
121
            raise BadRequest('Bad characters in group name')
122
        groups[n] = v.replace(' ', '').split(',')
123
        while '' in groups[n]:
124
            groups[n].remove('')
125
    return meta, groups
126

    
127
def put_account_headers(response, meta, groups, policy):
128
    if 'count' in meta:
129
        response['X-Account-Container-Count'] = meta['count']
130
    if 'bytes' in meta:
131
        response['X-Account-Bytes-Used'] = meta['bytes']
132
    response['Last-Modified'] = http_date(int(meta['modified']))
133
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
134
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
135
    if 'until_timestamp' in meta:
136
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
137
    for k, v in groups.iteritems():
138
        k = smart_str(k, strings_only=True)
139
        k = format_header_key('X-Account-Group-' + k)
140
        v = smart_str(','.join(v), strings_only=True)
141
        response[k] = v
142
    for k, v in policy.iteritems():
143
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
144

    
145
def get_container_headers(request):
146
    meta = get_header_prefix(request, 'X-Container-Meta-')
147
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
148
    return meta, policy
149

    
150
def put_container_headers(request, response, meta, policy):
151
    if 'count' in meta:
152
        response['X-Container-Object-Count'] = meta['count']
153
    if 'bytes' in meta:
154
        response['X-Container-Bytes-Used'] = meta['bytes']
155
    response['Last-Modified'] = http_date(int(meta['modified']))
156
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
157
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
158
    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
159
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
160
    response['X-Container-Block-Size'] = request.backend.block_size
161
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
162
    if 'until_timestamp' in meta:
163
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
164
    for k, v in policy.iteritems():
165
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
166

    
167
def get_object_headers(request):
168
    meta = get_header_prefix(request, 'X-Object-Meta-')
169
    if request.META.get('CONTENT_TYPE'):
170
        meta['Content-Type'] = request.META['CONTENT_TYPE']
171
    if request.META.get('HTTP_CONTENT_ENCODING'):
172
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
173
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
174
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
175
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
176
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
177
    return meta, get_sharing(request), get_public(request)
178

    
179
def put_object_headers(response, meta, restricted=False):
180
    response['ETag'] = meta['ETag']
181
    response['Content-Length'] = meta['bytes']
182
    response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
183
    response['Last-Modified'] = http_date(int(meta['modified']))
184
    if not restricted:
185
        response['X-Object-Hash'] = meta['hash']
186
        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
187
        response['X-Object-Version'] = meta['version']
188
        response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
189
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
190
            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
191
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
192
                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
193
                  'X-Object-Public'):
194
            if k in meta:
195
                response[k] = smart_str(meta[k], strings_only=True)
196
    else:
197
        for k in ('Content-Encoding', 'Content-Disposition'):
198
            if k in meta:
199
                response[k] = meta[k]
200

    
201
def update_manifest_meta(request, v_account, meta):
202
    """Update metadata if the object has an X-Object-Manifest."""
203
    
204
    if 'X-Object-Manifest' in meta:
205
        etag = ''
206
        bytes = 0
207
        try:
208
            src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
209
            objects = request.backend.list_objects(request.user_uniq, v_account,
210
                                src_container, prefix=src_name, virtual=False)
211
            for x in objects:
212
                src_meta = request.backend.get_object_meta(request.user_uniq,
213
                                        v_account, src_container, x[0], x[1])
214
                etag += src_meta['ETag']
215
                bytes += src_meta['bytes']
216
        except:
217
            # Ignore errors.
218
            return
219
        meta['bytes'] = bytes
220
        md5 = hashlib.md5()
221
        md5.update(etag)
222
        meta['ETag'] = md5.hexdigest().lower()
223

    
224
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
225
    if permissions is None:
226
        return
227
    allowed, perm_path, perms = permissions
228
    if len(perms) == 0:
229
        return
230
    ret = []
231
    r = ','.join(perms.get('read', []))
232
    if r:
233
        ret.append('read=' + r)
234
    w = ','.join(perms.get('write', []))
235
    if w:
236
        ret.append('write=' + w)
237
    meta['X-Object-Sharing'] = '; '.join(ret)
238
    if '/'.join((v_account, v_container, v_object)) != perm_path:
239
        meta['X-Object-Shared-By'] = perm_path
240
    if request.user_uniq != v_account:
241
        meta['X-Object-Allowed-To'] = allowed
242

    
243
def update_public_meta(public, meta):
244
    if not public:
245
        return
246
    meta['X-Object-Public'] = '/public/' + encode_url(public)
247

    
248
def validate_modification_preconditions(request, meta):
249
    """Check that the modified timestamp conforms with the preconditions set."""
250
    
251
    if 'modified' not in meta:
252
        return # TODO: Always return?
253
    
254
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
255
    if if_modified_since is not None:
256
        if_modified_since = parse_http_date_safe(if_modified_since)
257
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
258
        raise NotModified('Resource has not been modified')
259
    
260
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
261
    if if_unmodified_since is not None:
262
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
263
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
264
        raise PreconditionFailed('Resource has been modified')
265

    
266
def validate_matching_preconditions(request, meta):
267
    """Check that the ETag conforms with the preconditions set."""
268
    
269
    etag = meta.get('ETag', None)
270
    
271
    if_match = request.META.get('HTTP_IF_MATCH')
272
    if if_match is not None:
273
        if etag is None:
274
            raise PreconditionFailed('Resource does not exist')
275
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
276
            raise PreconditionFailed('Resource ETag does not match')
277
    
278
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
279
    if if_none_match is not None:
280
        # TODO: If this passes, must ignore If-Modified-Since header.
281
        if etag is not None:
282
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
283
                # TODO: Continue if an If-Modified-Since header is present.
284
                if request.method in ('HEAD', 'GET'):
285
                    raise NotModified('Resource ETag matches')
286
                raise PreconditionFailed('Resource exists or ETag matches')
287

    
288
def split_container_object_string(s):
289
    if not len(s) > 0 or s[0] != '/':
290
        raise ValueError
291
    s = s[1:]
292
    pos = s.find('/')
293
    if pos == -1 or pos == len(s) - 1:
294
        raise ValueError
295
    return s[:pos], s[(pos + 1):]
296

    
297
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
298
    """Copy or move an object."""
299
    
300
    meta, permissions, public = get_object_headers(request)
301
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
302
    try:
303
        if move:
304
            version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
305
                                                        dest_account, dest_container, dest_name,
306
                                                        meta, False, permissions)
307
        else:
308
            version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
309
                                                        dest_account, dest_container, dest_name,
310
                                                        meta, False, permissions, src_version)
311
    except NotAllowedError:
312
        raise Forbidden('Not allowed')
313
    except (NameError, IndexError):
314
        raise ItemNotFound('Container or object does not exist')
315
    except ValueError:
316
        raise BadRequest('Invalid sharing header')
317
    except AttributeError, e:
318
        raise Conflict('\n'.join(e.data) + '\n')
319
    except QuotaError:
320
        raise RequestEntityTooLarge('Quota exceeded')
321
    if public is not None:
322
        try:
323
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
324
        except NotAllowedError:
325
            raise Forbidden('Not allowed')
326
        except NameError:
327
            raise ItemNotFound('Object does not exist')
328
    return version_id
329

    
330
def get_int_parameter(p):
331
    if p is not None:
332
        try:
333
            p = int(p)
334
        except ValueError:
335
            return None
336
        if p < 0:
337
            return None
338
    return p
339

    
340
def get_content_length(request):
341
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
342
    if content_length is None:
343
        raise LengthRequired('Missing or invalid Content-Length header')
344
    return content_length
345

    
346
def get_range(request, size):
347
    """Parse a Range header from the request.
348
    
349
    Either returns None, when the header is not existent or should be ignored,
350
    or a list of (offset, length) tuples - should be further checked.
351
    """
352
    
353
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
354
    if not ranges.startswith('bytes='):
355
        return None
356
    
357
    ret = []
358
    for r in (x.strip() for x in ranges[6:].split(',')):
359
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
360
        m = p.match(r)
361
        if not m:
362
            return None
363
        offset = m.group('offset')
364
        upto = m.group('upto')
365
        if offset == '' and upto == '':
366
            return None
367
        
368
        if offset != '':
369
            offset = int(offset)
370
            if upto != '':
371
                upto = int(upto)
372
                if offset > upto:
373
                    return None
374
                ret.append((offset, upto - offset + 1))
375
            else:
376
                ret.append((offset, size - offset))
377
        else:
378
            length = int(upto)
379
            ret.append((size - length, length))
380
    
381
    return ret
382

    
383
def get_content_range(request):
384
    """Parse a Content-Range header from the request.
385
    
386
    Either returns None, when the header is not existent or should be ignored,
387
    or an (offset, length, total) tuple - check as length, total may be None.
388
    Returns (None, None, None) if the provided range is '*/*'.
389
    """
390
    
391
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
392
    if not ranges:
393
        return None
394
    
395
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
396
    m = p.match(ranges)
397
    if not m:
398
        if ranges == 'bytes */*':
399
            return (None, None, None)
400
        return None
401
    offset = int(m.group('offset'))
402
    upto = m.group('upto')
403
    total = m.group('total')
404
    if upto != '':
405
        upto = int(upto)
406
    else:
407
        upto = None
408
    if total != '*':
409
        total = int(total)
410
    else:
411
        total = None
412
    if (upto is not None and offset > upto) or \
413
        (total is not None and offset >= total) or \
414
        (total is not None and upto is not None and upto >= total):
415
        return None
416
    
417
    if upto is None:
418
        length = None
419
    else:
420
        length = upto - offset + 1
421
    return (offset, length, total)
422

    
423
def get_sharing(request):
424
    """Parse an X-Object-Sharing header from the request.
425
    
426
    Raises BadRequest on error.
427
    """
428
    
429
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
430
    if permissions is None:
431
        return None
432
    
433
    # TODO: Document or remove '~' replacing.
434
    permissions = permissions.replace('~', '')
435
    
436
    ret = {}
437
    permissions = permissions.replace(' ', '')
438
    if permissions == '':
439
        return ret
440
    for perm in (x for x in permissions.split(';')):
441
        if perm.startswith('read='):
442
            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
443
            if '' in ret['read']:
444
                ret['read'].remove('')
445
            if '*' in ret['read']:
446
                ret['read'] = ['*']
447
            if len(ret['read']) == 0:
448
                raise BadRequest('Bad X-Object-Sharing header value')
449
        elif perm.startswith('write='):
450
            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
451
            if '' in ret['write']:
452
                ret['write'].remove('')
453
            if '*' in ret['write']:
454
                ret['write'] = ['*']
455
            if len(ret['write']) == 0:
456
                raise BadRequest('Bad X-Object-Sharing header value')
457
        else:
458
            raise BadRequest('Bad X-Object-Sharing header value')
459
    
460
    # Keep duplicates only in write list.
461
    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
462
    if dups:
463
        for x in dups:
464
            ret['read'].remove(x)
465
        if len(ret['read']) == 0:
466
            del(ret['read'])
467
    
468
    return ret
469

    
470
def get_public(request):
471
    """Parse an X-Object-Public header from the request.
472
    
473
    Raises BadRequest on error.
474
    """
475
    
476
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
477
    if public is None:
478
        return None
479
    
480
    public = public.replace(' ', '').lower()
481
    if public == 'true':
482
        return True
483
    elif public == 'false' or public == '':
484
        return False
485
    raise BadRequest('Bad X-Object-Public header value')
486

    
487
def raw_input_socket(request):
488
    """Return the socket for reading the rest of the request."""
489
    
490
    server_software = request.META.get('SERVER_SOFTWARE')
491
    if server_software and server_software.startswith('mod_python'):
492
        return request._req
493
    if 'wsgi.input' in request.environ:
494
        return request.environ['wsgi.input']
495
    raise ServiceUnavailable('Unknown server software')
496

    
497
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
498

    
499
def socket_read_iterator(request, length=0, blocksize=4096):
500
    """Return a maximum of blocksize data read from the socket in each iteration.
501
    
502
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
503
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
504
    """
505
    
506
    sock = raw_input_socket(request)
507
    if length < 0: # Chunked transfers
508
        # Small version (server does the dechunking).
509
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
510
            while length < MAX_UPLOAD_SIZE:
511
                data = sock.read(blocksize)
512
                if data == '':
513
                    return
514
                yield data
515
            raise BadRequest('Maximum size is reached')
516
        
517
        # Long version (do the dechunking).
518
        data = ''
519
        while length < MAX_UPLOAD_SIZE:
520
            # Get chunk size.
521
            if hasattr(sock, 'readline'):
522
                chunk_length = sock.readline()
523
            else:
524
                chunk_length = ''
525
                while chunk_length[-1:] != '\n':
526
                    chunk_length += sock.read(1)
527
                chunk_length.strip()
528
            pos = chunk_length.find(';')
529
            if pos >= 0:
530
                chunk_length = chunk_length[:pos]
531
            try:
532
                chunk_length = int(chunk_length, 16)
533
            except Exception, e:
534
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
535
            # Check if done.
536
            if chunk_length == 0:
537
                if len(data) > 0:
538
                    yield data
539
                return
540
            # Get the actual data.
541
            while chunk_length > 0:
542
                chunk = sock.read(min(chunk_length, blocksize))
543
                chunk_length -= len(chunk)
544
                if length > 0:
545
                    length += len(chunk)
546
                data += chunk
547
                if len(data) >= blocksize:
548
                    ret = data[:blocksize]
549
                    data = data[blocksize:]
550
                    yield ret
551
            sock.read(2) # CRLF
552
        raise BadRequest('Maximum size is reached')
553
    else:
554
        if length > MAX_UPLOAD_SIZE:
555
            raise BadRequest('Maximum size is reached')
556
        while length > 0:
557
            data = sock.read(min(length, blocksize))
558
            if not data:
559
                raise BadRequest()
560
            length -= len(data)
561
            yield data
562

    
563
class SaveToBackendHandler(FileUploadHandler):
564
    """Handle a file from an HTML form the django way."""
565
    
566
    def __init__(self, request=None):
567
        super(SaveToBackendHandler, self).__init__(request)
568
        self.backend = request.backend
569
    
570
    def put_data(self, length):
571
        if len(self.data) >= length:
572
            block = self.data[:length]
573
            self.file.hashmap.append(self.backend.put_block(block))
574
            self.md5.update(block)
575
            self.data = self.data[length:]
576
    
577
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
578
        self.md5 = hashlib.md5()        
579
        self.data = ''
580
        self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
581
        self.file.size = 0
582
        self.file.hashmap = []
583
    
584
    def receive_data_chunk(self, raw_data, start):
585
        self.data += raw_data
586
        self.file.size += len(raw_data)
587
        self.put_data(self.request.backend.block_size)
588
        return None
589
    
590
    def file_complete(self, file_size):
591
        l = len(self.data)
592
        if l > 0:
593
            self.put_data(l)
594
        self.file.etag = self.md5.hexdigest().lower()
595
        return self.file
596

    
597
class ObjectWrapper(object):
598
    """Return the object's data block-per-block in each iteration.
599
    
600
    Read from the object using the offset and length provided in each entry of the range list.
601
    """
602
    
603
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
604
        self.backend = backend
605
        self.ranges = ranges
606
        self.sizes = sizes
607
        self.hashmaps = hashmaps
608
        self.boundary = boundary
609
        self.size = sum(self.sizes)
610
        
611
        self.file_index = 0
612
        self.block_index = 0
613
        self.block_hash = -1
614
        self.block = ''
615
        
616
        self.range_index = -1
617
        self.offset, self.length = self.ranges[0]
618
    
619
    def __iter__(self):
620
        return self
621
    
622
    def part_iterator(self):
623
        if self.length > 0:
624
            # Get the file for the current offset.
625
            file_size = self.sizes[self.file_index]
626
            while self.offset >= file_size:
627
                self.offset -= file_size
628
                self.file_index += 1
629
                file_size = self.sizes[self.file_index]
630
            
631
            # Get the block for the current position.
632
            self.block_index = int(self.offset / self.backend.block_size)
633
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
634
                self.block_hash = self.hashmaps[self.file_index][self.block_index]
635
                try:
636
                    self.block = self.backend.get_block(self.block_hash)
637
                except NameError:
638
                    raise ItemNotFound('Block does not exist')
639
            
640
            # Get the data from the block.
641
            bo = self.offset % self.backend.block_size
642
            bl = min(self.length, len(self.block) - bo)
643
            data = self.block[bo:bo + bl]
644
            self.offset += bl
645
            self.length -= bl
646
            return data
647
        else:
648
            raise StopIteration
649
    
650
    def next(self):
651
        if len(self.ranges) == 1:
652
            return self.part_iterator()
653
        if self.range_index == len(self.ranges):
654
            raise StopIteration
655
        try:
656
            if self.range_index == -1:
657
                raise StopIteration
658
            return self.part_iterator()
659
        except StopIteration:
660
            self.range_index += 1
661
            out = []
662
            if self.range_index < len(self.ranges):
663
                # Part header.
664
                self.offset, self.length = self.ranges[self.range_index]
665
                self.file_index = 0
666
                if self.range_index > 0:
667
                    out.append('')
668
                out.append('--' + self.boundary)
669
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
670
                out.append('Content-Transfer-Encoding: binary')
671
                out.append('')
672
                out.append('')
673
                return '\r\n'.join(out)
674
            else:
675
                # Footer.
676
                out.append('')
677
                out.append('--' + self.boundary + '--')
678
                out.append('')
679
                return '\r\n'.join(out)
680

    
681
def object_data_response(request, sizes, hashmaps, meta, public=False):
682
    """Get the HttpResponse object for replying with the object's data."""
683
    
684
    # Range handling.
685
    size = sum(sizes)
686
    ranges = get_range(request, size)
687
    if ranges is None:
688
        ranges = [(0, size)]
689
        ret = 200
690
    else:
691
        check = [True for offset, length in ranges if
692
                    length <= 0 or length > size or
693
                    offset < 0 or offset >= size or
694
                    offset + length > size]
695
        if len(check) > 0:
696
            raise RangeNotSatisfiable('Requested range exceeds object limits')
697
        ret = 206
698
        if_range = request.META.get('HTTP_IF_RANGE')
699
        if if_range:
700
            try:
701
                # Modification time has passed instead.
702
                last_modified = parse_http_date(if_range)
703
                if last_modified != meta['modified']:
704
                    ranges = [(0, size)]
705
                    ret = 200
706
            except ValueError:
707
                if if_range != meta['ETag']:
708
                    ranges = [(0, size)]
709
                    ret = 200
710
    
711
    if ret == 206 and len(ranges) > 1:
712
        boundary = uuid.uuid4().hex
713
    else:
714
        boundary = ''
715
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
716
    response = HttpResponse(wrapper, status=ret)
717
    put_object_headers(response, meta, public)
718
    if ret == 206:
719
        if len(ranges) == 1:
720
            offset, length = ranges[0]
721
            response['Content-Length'] = length # Update with the correct length.
722
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
723
        else:
724
            del(response['Content-Length'])
725
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
726
    return response
727

    
728
def put_object_block(request, hashmap, data, offset):
729
    """Put one block of data at the given offset."""
730
    
731
    bi = int(offset / request.backend.block_size)
732
    bo = offset % request.backend.block_size
733
    bl = min(len(data), request.backend.block_size - bo)
734
    if bi < len(hashmap):
735
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
736
    else:
737
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
738
    return bl # Return ammount of data written.
739

    
740
def hashmap_hash(request, hashmap):
741
    """Produce the root hash, treating the hashmap as a Merkle-like tree."""
742
    
743
    def subhash(d):
744
        h = hashlib.new(request.backend.hash_algorithm)
745
        h.update(d)
746
        return h.digest()
747
    
748
    if len(hashmap) == 0:
749
        return hexlify(subhash(''))
750
    if len(hashmap) == 1:
751
        return hashmap[0]
752
    
753
    s = 2
754
    while s < len(hashmap):
755
        s = s * 2
756
    h = [unhexlify(x) for x in hashmap]
757
    h += [('\x00' * len(h[0]))] * (s - len(hashmap))
758
    while len(h) > 1:
759
        h = [subhash(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
760
    return hexlify(h[0])
761

    
762
def update_request_headers(request):
763
    # Handle URL-encoded keys and values.
764
    meta = request.META
765
    for k, v in meta.copy().iteritems():
766
        if ((k.startswith('HTTP_X_ACCOUNT_META_') or k.startswith('HTTP_X_ACCOUNT_GROUP_') or
767
             k.startswith('HTTP_X_CONTAINER_META_') or k.startswith('HTTP_X_OBJECT_META_') or
768
             k in ('HTTP_X_OBJECT_MANIFEST', 'HTTP_X_OBJECT_SHARING',
769
                   'HTTP_X_COPY_FROM', 'HTTP_X_MOVE_FROM',
770
                   'HTTP_X_SOURCE_ACCOUNT', 'HTTP_X_SOURCE_OBJECT',
771
                   'HTTP_DESTINATION_ACCOUNT', 'HTTP_DESTINATION')) and
772
            ('%' in k or '%' in v)):
773
            del(meta[k])
774
            meta[unquote(k)] = unquote(v)
775

    
776
def update_response_headers(request, response):
777
    if request.serialization == 'xml':
778
        response['Content-Type'] = 'application/xml; charset=UTF-8'
779
    elif request.serialization == 'json':
780
        response['Content-Type'] = 'application/json; charset=UTF-8'
781
    elif not response['Content-Type']:
782
        response['Content-Type'] = 'text/plain; charset=UTF-8'
783
    
784
    if (not response.has_header('Content-Length') and
785
        not (response.has_header('Content-Type') and
786
             response['Content-Type'].startswith('multipart/byteranges'))):
787
        response['Content-Length'] = len(response.content)
788
    
789
    # URL-encode unicode in headers.
790
    meta = response.items()
791
    for k, v in meta:
792
        if (k.startswith('X-Account-Meta-') or k.startswith('X-Account-Group-') or
793
            k.startswith('X-Container-Meta-') or k.startswith('X-Object-Meta-') or
794
            k in ('X-Container-Object-Meta', 'X-Object-Manifest', 'X-Object-Sharing', 'X-Object-Shared-By')):
795
            del(response[k])
796
            response[quote(k)] = quote(v)
797
    
798
    if settings.TEST:
799
        response['Date'] = format_date_time(time())
800

    
801
def render_fault(request, fault):
802
    if settings.DEBUG or settings.TEST:
803
        fault.details = format_exc(fault)
804
    
805
    request.serialization = 'text'
806
    data = '\n'.join((fault.message, fault.details)) + '\n'
807
    response = HttpResponse(data, status=fault.code)
808
    update_response_headers(request, response)
809
    return response
810

    
811
def request_serialization(request, format_allowed=False):
812
    """Return the serialization format requested.
813
    
814
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
815
    """
816
    
817
    if not format_allowed:
818
        return 'text'
819
    
820
    format = request.GET.get('format')
821
    if format == 'json':
822
        return 'json'
823
    elif format == 'xml':
824
        return 'xml'
825
    
826
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
827
        accept, sep, rest = item.strip().partition(';')
828
        if accept == 'application/json':
829
            return 'json'
830
        elif accept == 'application/xml' or accept == 'text/xml':
831
            return 'xml'
832
    
833
    return 'text'
834

    
835
def api_method(http_method=None, format_allowed=False, user_required=True):
836
    """Decorator function for views that implement an API method."""
837
    
838
    def decorator(func):
839
        @wraps(func)
840
        def wrapper(request, *args, **kwargs):
841
            try:
842
                if http_method and request.method != http_method:
843
                    raise BadRequest('Method not allowed.')
844
                if user_required and getattr(request, 'user', None) is None:
845
                    raise Unauthorized('Access denied')
846
                
847
                # The args variable may contain up to (account, container, object).
848
                if len(args) > 1 and len(args[1]) > 256:
849
                    raise BadRequest('Container name too large.')
850
                if len(args) > 2 and len(args[2]) > 1024:
851
                    raise BadRequest('Object name too large.')
852
                
853
                # Format and check headers.
854
                update_request_headers(request)
855
                meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
856
                if len(meta) > 90:
857
                    raise BadRequest('Too many headers.')
858
                for k, v in meta.iteritems():
859
                    if len(k) > 128:
860
                        raise BadRequest('Header name too large.')
861
                    if len(v) > 256:
862
                        raise BadRequest('Header value too large.')
863
                
864
                # Fill in custom request variables.
865
                request.serialization = request_serialization(request, format_allowed)
866
                request.backend = connect_backend()
867
                
868
                response = func(request, *args, **kwargs)
869
                update_response_headers(request, response)
870
                return response
871
            except Fault, fault:
872
                return render_fault(request, fault)
873
            except BaseException, e:
874
                logger.exception('Unexpected error: %s' % e)
875
                fault = ServiceUnavailable('Unexpected error')
876
                return render_fault(request, fault)
877
            finally:
878
                if getattr(request, 'backend', None) is not None:
879
                    request.backend.close()
880
        return wrapper
881
    return decorator