Statistics
| Branch: | Tag: | Revision:

root / pithos / api / util.py @ 22d7b01e

History | View | Annotate | Download (31.9 kB)

1
# Copyright 2011 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40

    
41
from django.conf import settings
42
from django.http import HttpResponse
43
from django.utils import simplejson as json
44
from django.utils.http import http_date, parse_etags
45
from django.utils.encoding import smart_str
46
from django.core.files.uploadhandler import FileUploadHandler
47
from django.core.files.uploadedfile import UploadedFile
48

    
49
from pithos.api.compat import parse_http_date_safe, parse_http_date
50
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
51
                                Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
52
                                RangeNotSatisfiable, ServiceUnavailable)
53
from pithos.backends import connect_backend
54
from pithos.backends.base import NotAllowedError, QuotaError
55

    
56
import logging
57
import re
58
import hashlib
59
import uuid
60
import decimal
61

    
62

    
63
logger = logging.getLogger(__name__)
64

    
65

    
66
class UTC(tzinfo):
67
   def utcoffset(self, dt):
68
       return timedelta(0)
69

    
70
   def tzname(self, dt):
71
       return 'UTC'
72

    
73
   def dst(self, dt):
74
       return timedelta(0)
75

    
76
def json_encode_decimal(obj):
77
    if isinstance(obj, decimal.Decimal):
78
        return str(obj)
79
    raise TypeError(repr(obj) + " is not JSON serializable")
80

    
81
def isoformat(d):
82
   """Return an ISO8601 date string that includes a timezone."""
83

    
84
   return d.replace(tzinfo=UTC()).isoformat()
85

    
86
def rename_meta_key(d, old, new):
87
    if old not in d:
88
        return
89
    d[new] = d[old]
90
    del(d[old])
91

    
92
def printable_header_dict(d):
93
    """Format a meta dictionary for printing out json/xml.
94
    
95
    Convert all keys to lower case and replace dashes with underscores.
96
    Format 'last_modified' timestamp.
97
    """
98
    
99
    d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
100
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
101

    
102
def format_header_key(k):
103
    """Convert underscores to dashes and capitalize intra-dash strings."""
104
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
105

    
106
def get_header_prefix(request, prefix):
107
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
108
    
109
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
110
    # TODO: Document or remove '~' replacing.
111
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
112

    
113
def get_account_headers(request):
114
    meta = get_header_prefix(request, 'X-Account-Meta-')
115
    groups = {}
116
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
117
        n = k[16:].lower()
118
        if '-' in n or '_' in n:
119
            raise BadRequest('Bad characters in group name')
120
        groups[n] = v.replace(' ', '').split(',')
121
        while '' in groups[n]:
122
            groups[n].remove('')
123
    return meta, groups
124

    
125
def put_account_headers(response, meta, groups, policy):
126
    if 'count' in meta:
127
        response['X-Account-Container-Count'] = meta['count']
128
    if 'bytes' in meta:
129
        response['X-Account-Bytes-Used'] = meta['bytes']
130
    response['Last-Modified'] = http_date(int(meta['modified']))
131
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
132
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
133
    if 'until_timestamp' in meta:
134
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
135
    for k, v in groups.iteritems():
136
        k = smart_str(k, strings_only=True)
137
        k = format_header_key('X-Account-Group-' + k)
138
        v = smart_str(','.join(v), strings_only=True)
139
        response[k] = v
140
    for k, v in policy.iteritems():
141
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
142

    
143
def get_container_headers(request):
144
    meta = get_header_prefix(request, 'X-Container-Meta-')
145
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
146
    return meta, policy
147

    
148
def put_container_headers(request, response, meta, policy):
149
    if 'count' in meta:
150
        response['X-Container-Object-Count'] = meta['count']
151
    if 'bytes' in meta:
152
        response['X-Container-Bytes-Used'] = meta['bytes']
153
    response['Last-Modified'] = http_date(int(meta['modified']))
154
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
155
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
156
    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
157
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
158
    response['X-Container-Block-Size'] = request.backend.block_size
159
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
160
    if 'until_timestamp' in meta:
161
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
162
    for k, v in policy.iteritems():
163
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
164

    
165
def get_object_headers(request):
166
    meta = get_header_prefix(request, 'X-Object-Meta-')
167
    if request.META.get('CONTENT_TYPE'):
168
        meta['Content-Type'] = request.META['CONTENT_TYPE']
169
    if request.META.get('HTTP_CONTENT_ENCODING'):
170
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
171
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
172
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
173
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
174
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
175
    return meta, get_sharing(request), get_public(request)
176

    
177
def put_object_headers(response, meta, restricted=False):
178
    response['ETag'] = meta['ETag']
179
    response['Content-Length'] = meta['bytes']
180
    response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
181
    response['Last-Modified'] = http_date(int(meta['modified']))
182
    if not restricted:
183
        response['X-Object-Hash'] = meta['hash']
184
        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
185
        response['X-Object-Version'] = meta['version']
186
        response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
187
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
188
            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
189
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
190
                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
191
                  'X-Object-Public'):
192
            if k in meta:
193
                response[k] = smart_str(meta[k], strings_only=True)
194
    else:
195
        for k in ('Content-Encoding', 'Content-Disposition'):
196
            if k in meta:
197
                response[k] = meta[k]
198

    
199
def update_manifest_meta(request, v_account, meta):
200
    """Update metadata if the object has an X-Object-Manifest."""
201
    
202
    if 'X-Object-Manifest' in meta:
203
        etag = ''
204
        bytes = 0
205
        try:
206
            src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
207
            objects = request.backend.list_objects(request.user_uniq, v_account,
208
                                src_container, prefix=src_name, virtual=False)
209
            for x in objects:
210
                src_meta = request.backend.get_object_meta(request.user_uniq,
211
                                        v_account, src_container, x[0], x[1])
212
                etag += src_meta['ETag']
213
                bytes += src_meta['bytes']
214
        except:
215
            # Ignore errors.
216
            return
217
        meta['bytes'] = bytes
218
        md5 = hashlib.md5()
219
        md5.update(etag)
220
        meta['ETag'] = md5.hexdigest().lower()
221

    
222
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
223
    if permissions is None:
224
        return
225
    allowed, perm_path, perms = permissions
226
    if len(perms) == 0:
227
        return
228
    ret = []
229
    r = ','.join(perms.get('read', []))
230
    if r:
231
        ret.append('read=' + r)
232
    w = ','.join(perms.get('write', []))
233
    if w:
234
        ret.append('write=' + w)
235
    meta['X-Object-Sharing'] = '; '.join(ret)
236
    if '/'.join((v_account, v_container, v_object)) != perm_path:
237
        meta['X-Object-Shared-By'] = perm_path
238
    if request.user_uniq != v_account:
239
        meta['X-Object-Allowed-To'] = allowed
240

    
241
def update_public_meta(public, meta):
242
    if not public:
243
        return
244
    meta['X-Object-Public'] = public
245

    
246
def validate_modification_preconditions(request, meta):
247
    """Check that the modified timestamp conforms with the preconditions set."""
248
    
249
    if 'modified' not in meta:
250
        return # TODO: Always return?
251
    
252
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
253
    if if_modified_since is not None:
254
        if_modified_since = parse_http_date_safe(if_modified_since)
255
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
256
        raise NotModified('Resource has not been modified')
257
    
258
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
259
    if if_unmodified_since is not None:
260
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
261
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
262
        raise PreconditionFailed('Resource has been modified')
263

    
264
def validate_matching_preconditions(request, meta):
265
    """Check that the ETag conforms with the preconditions set."""
266
    
267
    etag = meta.get('ETag', None)
268
    
269
    if_match = request.META.get('HTTP_IF_MATCH')
270
    if if_match is not None:
271
        if etag is None:
272
            raise PreconditionFailed('Resource does not exist')
273
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
274
            raise PreconditionFailed('Resource ETag does not match')
275
    
276
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
277
    if if_none_match is not None:
278
        # TODO: If this passes, must ignore If-Modified-Since header.
279
        if etag is not None:
280
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
281
                # TODO: Continue if an If-Modified-Since header is present.
282
                if request.method in ('HEAD', 'GET'):
283
                    raise NotModified('Resource ETag matches')
284
                raise PreconditionFailed('Resource exists or ETag matches')
285

    
286
def split_container_object_string(s):
287
    if not len(s) > 0 or s[0] != '/':
288
        raise ValueError
289
    s = s[1:]
290
    pos = s.find('/')
291
    if pos == -1 or pos == len(s) - 1:
292
        raise ValueError
293
    return s[:pos], s[(pos + 1):]
294

    
295
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
296
    """Copy or move an object."""
297
    
298
    meta, permissions, public = get_object_headers(request)
299
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
300
    try:
301
        if move:
302
            version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
303
                                                        dest_account, dest_container, dest_name,
304
                                                        meta, False, permissions)
305
        else:
306
            version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
307
                                                        dest_account, dest_container, dest_name,
308
                                                        meta, False, permissions, src_version)
309
    except NotAllowedError:
310
        raise Forbidden('Not allowed')
311
    except (NameError, IndexError):
312
        raise ItemNotFound('Container or object does not exist')
313
    except ValueError:
314
        raise BadRequest('Invalid sharing header')
315
    except AttributeError, e:
316
        raise Conflict('\n'.join(e.data) + '\n')
317
    except QuotaError:
318
        raise RequestEntityTooLarge('Quota exceeded')
319
    if public is not None:
320
        try:
321
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
322
        except NotAllowedError:
323
            raise Forbidden('Not allowed')
324
        except NameError:
325
            raise ItemNotFound('Object does not exist')
326
    return version_id
327

    
328
def get_int_parameter(p):
329
    if p is not None:
330
        try:
331
            p = int(p)
332
        except ValueError:
333
            return None
334
        if p < 0:
335
            return None
336
    return p
337

    
338
def get_content_length(request):
339
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
340
    if content_length is None:
341
        raise LengthRequired('Missing or invalid Content-Length header')
342
    return content_length
343

    
344
def get_range(request, size):
345
    """Parse a Range header from the request.
346
    
347
    Either returns None, when the header is not existent or should be ignored,
348
    or a list of (offset, length) tuples - should be further checked.
349
    """
350
    
351
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
352
    if not ranges.startswith('bytes='):
353
        return None
354
    
355
    ret = []
356
    for r in (x.strip() for x in ranges[6:].split(',')):
357
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
358
        m = p.match(r)
359
        if not m:
360
            return None
361
        offset = m.group('offset')
362
        upto = m.group('upto')
363
        if offset == '' and upto == '':
364
            return None
365
        
366
        if offset != '':
367
            offset = int(offset)
368
            if upto != '':
369
                upto = int(upto)
370
                if offset > upto:
371
                    return None
372
                ret.append((offset, upto - offset + 1))
373
            else:
374
                ret.append((offset, size - offset))
375
        else:
376
            length = int(upto)
377
            ret.append((size - length, length))
378
    
379
    return ret
380

    
381
def get_content_range(request):
382
    """Parse a Content-Range header from the request.
383
    
384
    Either returns None, when the header is not existent or should be ignored,
385
    or an (offset, length, total) tuple - check as length, total may be None.
386
    Returns (None, None, None) if the provided range is '*/*'.
387
    """
388
    
389
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
390
    if not ranges:
391
        return None
392
    
393
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
394
    m = p.match(ranges)
395
    if not m:
396
        if ranges == 'bytes */*':
397
            return (None, None, None)
398
        return None
399
    offset = int(m.group('offset'))
400
    upto = m.group('upto')
401
    total = m.group('total')
402
    if upto != '':
403
        upto = int(upto)
404
    else:
405
        upto = None
406
    if total != '*':
407
        total = int(total)
408
    else:
409
        total = None
410
    if (upto is not None and offset > upto) or \
411
        (total is not None and offset >= total) or \
412
        (total is not None and upto is not None and upto >= total):
413
        return None
414
    
415
    if upto is None:
416
        length = None
417
    else:
418
        length = upto - offset + 1
419
    return (offset, length, total)
420

    
421
def get_sharing(request):
422
    """Parse an X-Object-Sharing header from the request.
423
    
424
    Raises BadRequest on error.
425
    """
426
    
427
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
428
    if permissions is None:
429
        return None
430
    
431
    # TODO: Document or remove '~' replacing.
432
    permissions = permissions.replace('~', '')
433
    
434
    ret = {}
435
    permissions = permissions.replace(' ', '')
436
    if permissions == '':
437
        return ret
438
    for perm in (x for x in permissions.split(';')):
439
        if perm.startswith('read='):
440
            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
441
            if '' in ret['read']:
442
                ret['read'].remove('')
443
            if '*' in ret['read']:
444
                ret['read'] = ['*']
445
            if len(ret['read']) == 0:
446
                raise BadRequest('Bad X-Object-Sharing header value')
447
        elif perm.startswith('write='):
448
            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
449
            if '' in ret['write']:
450
                ret['write'].remove('')
451
            if '*' in ret['write']:
452
                ret['write'] = ['*']
453
            if len(ret['write']) == 0:
454
                raise BadRequest('Bad X-Object-Sharing header value')
455
        else:
456
            raise BadRequest('Bad X-Object-Sharing header value')
457
    
458
    # Keep duplicates only in write list.
459
    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
460
    if dups:
461
        for x in dups:
462
            ret['read'].remove(x)
463
        if len(ret['read']) == 0:
464
            del(ret['read'])
465
    
466
    return ret
467

    
468
def get_public(request):
469
    """Parse an X-Object-Public header from the request.
470
    
471
    Raises BadRequest on error.
472
    """
473
    
474
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
475
    if public is None:
476
        return None
477
    
478
    public = public.replace(' ', '').lower()
479
    if public == 'true':
480
        return True
481
    elif public == 'false' or public == '':
482
        return False
483
    raise BadRequest('Bad X-Object-Public header value')
484

    
485
def raw_input_socket(request):
486
    """Return the socket for reading the rest of the request."""
487
    
488
    server_software = request.META.get('SERVER_SOFTWARE')
489
    if server_software and server_software.startswith('mod_python'):
490
        return request._req
491
    if 'wsgi.input' in request.environ:
492
        return request.environ['wsgi.input']
493
    raise ServiceUnavailable('Unknown server software')
494

    
495
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
496

    
497
def socket_read_iterator(request, length=0, blocksize=4096):
498
    """Return a maximum of blocksize data read from the socket in each iteration.
499
    
500
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
501
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
502
    """
503
    
504
    sock = raw_input_socket(request)
505
    if length < 0: # Chunked transfers
506
        # Small version (server does the dechunking).
507
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
508
            while length < MAX_UPLOAD_SIZE:
509
                data = sock.read(blocksize)
510
                if data == '':
511
                    return
512
                yield data
513
            raise BadRequest('Maximum size is reached')
514
        
515
        # Long version (do the dechunking).
516
        data = ''
517
        while length < MAX_UPLOAD_SIZE:
518
            # Get chunk size.
519
            if hasattr(sock, 'readline'):
520
                chunk_length = sock.readline()
521
            else:
522
                chunk_length = ''
523
                while chunk_length[-1:] != '\n':
524
                    chunk_length += sock.read(1)
525
                chunk_length.strip()
526
            pos = chunk_length.find(';')
527
            if pos >= 0:
528
                chunk_length = chunk_length[:pos]
529
            try:
530
                chunk_length = int(chunk_length, 16)
531
            except Exception, e:
532
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
533
            # Check if done.
534
            if chunk_length == 0:
535
                if len(data) > 0:
536
                    yield data
537
                return
538
            # Get the actual data.
539
            while chunk_length > 0:
540
                chunk = sock.read(min(chunk_length, blocksize))
541
                chunk_length -= len(chunk)
542
                if length > 0:
543
                    length += len(chunk)
544
                data += chunk
545
                if len(data) >= blocksize:
546
                    ret = data[:blocksize]
547
                    data = data[blocksize:]
548
                    yield ret
549
            sock.read(2) # CRLF
550
        raise BadRequest('Maximum size is reached')
551
    else:
552
        if length > MAX_UPLOAD_SIZE:
553
            raise BadRequest('Maximum size is reached')
554
        while length > 0:
555
            data = sock.read(min(length, blocksize))
556
            if not data:
557
                raise BadRequest()
558
            length -= len(data)
559
            yield data
560

    
561
class SaveToBackendHandler(FileUploadHandler):
562
    """Handle a file from an HTML form the django way."""
563
    
564
    def __init__(self, request=None):
565
        super(SaveToBackendHandler, self).__init__(request)
566
        self.backend = request.backend
567
    
568
    def put_data(self, length):
569
        if len(self.data) >= length:
570
            block = self.data[:length]
571
            self.file.hashmap.append(self.backend.put_block(block))
572
            self.md5.update(block)
573
            self.data = self.data[length:]
574
    
575
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
576
        self.md5 = hashlib.md5()        
577
        self.data = ''
578
        self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
579
        self.file.size = 0
580
        self.file.hashmap = []
581
    
582
    def receive_data_chunk(self, raw_data, start):
583
        self.data += raw_data
584
        self.file.size += len(raw_data)
585
        self.put_data(self.request.backend.block_size)
586
        return None
587
    
588
    def file_complete(self, file_size):
589
        l = len(self.data)
590
        if l > 0:
591
            self.put_data(l)
592
        self.file.etag = self.md5.hexdigest().lower()
593
        return self.file
594

    
595
class ObjectWrapper(object):
596
    """Return the object's data block-per-block in each iteration.
597
    
598
    Read from the object using the offset and length provided in each entry of the range list.
599
    """
600
    
601
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
602
        self.backend = backend
603
        self.ranges = ranges
604
        self.sizes = sizes
605
        self.hashmaps = hashmaps
606
        self.boundary = boundary
607
        self.size = sum(self.sizes)
608
        
609
        self.file_index = 0
610
        self.block_index = 0
611
        self.block_hash = -1
612
        self.block = ''
613
        
614
        self.range_index = -1
615
        self.offset, self.length = self.ranges[0]
616
    
617
    def __iter__(self):
618
        return self
619
    
620
    def part_iterator(self):
621
        if self.length > 0:
622
            # Get the file for the current offset.
623
            file_size = self.sizes[self.file_index]
624
            while self.offset >= file_size:
625
                self.offset -= file_size
626
                self.file_index += 1
627
                file_size = self.sizes[self.file_index]
628
            
629
            # Get the block for the current position.
630
            self.block_index = int(self.offset / self.backend.block_size)
631
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
632
                self.block_hash = self.hashmaps[self.file_index][self.block_index]
633
                try:
634
                    self.block = self.backend.get_block(self.block_hash)
635
                except NameError:
636
                    raise ItemNotFound('Block does not exist')
637
            
638
            # Get the data from the block.
639
            bo = self.offset % self.backend.block_size
640
            bl = min(self.length, len(self.block) - bo)
641
            data = self.block[bo:bo + bl]
642
            self.offset += bl
643
            self.length -= bl
644
            return data
645
        else:
646
            raise StopIteration
647
    
648
    def next(self):
649
        if len(self.ranges) == 1:
650
            return self.part_iterator()
651
        if self.range_index == len(self.ranges):
652
            raise StopIteration
653
        try:
654
            if self.range_index == -1:
655
                raise StopIteration
656
            return self.part_iterator()
657
        except StopIteration:
658
            self.range_index += 1
659
            out = []
660
            if self.range_index < len(self.ranges):
661
                # Part header.
662
                self.offset, self.length = self.ranges[self.range_index]
663
                self.file_index = 0
664
                if self.range_index > 0:
665
                    out.append('')
666
                out.append('--' + self.boundary)
667
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
668
                out.append('Content-Transfer-Encoding: binary')
669
                out.append('')
670
                out.append('')
671
                return '\r\n'.join(out)
672
            else:
673
                # Footer.
674
                out.append('')
675
                out.append('--' + self.boundary + '--')
676
                out.append('')
677
                return '\r\n'.join(out)
678

    
679
def object_data_response(request, sizes, hashmaps, meta, public=False):
680
    """Get the HttpResponse object for replying with the object's data."""
681
    
682
    # Range handling.
683
    size = sum(sizes)
684
    ranges = get_range(request, size)
685
    if ranges is None:
686
        ranges = [(0, size)]
687
        ret = 200
688
    else:
689
        check = [True for offset, length in ranges if
690
                    length <= 0 or length > size or
691
                    offset < 0 or offset >= size or
692
                    offset + length > size]
693
        if len(check) > 0:
694
            raise RangeNotSatisfiable('Requested range exceeds object limits')
695
        ret = 206
696
        if_range = request.META.get('HTTP_IF_RANGE')
697
        if if_range:
698
            try:
699
                # Modification time has passed instead.
700
                last_modified = parse_http_date(if_range)
701
                if last_modified != meta['modified']:
702
                    ranges = [(0, size)]
703
                    ret = 200
704
            except ValueError:
705
                if if_range != meta['ETag']:
706
                    ranges = [(0, size)]
707
                    ret = 200
708
    
709
    if ret == 206 and len(ranges) > 1:
710
        boundary = uuid.uuid4().hex
711
    else:
712
        boundary = ''
713
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
714
    response = HttpResponse(wrapper, status=ret)
715
    put_object_headers(response, meta, public)
716
    if ret == 206:
717
        if len(ranges) == 1:
718
            offset, length = ranges[0]
719
            response['Content-Length'] = length # Update with the correct length.
720
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
721
        else:
722
            del(response['Content-Length'])
723
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
724
    return response
725

    
726
def put_object_block(request, hashmap, data, offset):
727
    """Put one block of data at the given offset."""
728
    
729
    bi = int(offset / request.backend.block_size)
730
    bo = offset % request.backend.block_size
731
    bl = min(len(data), request.backend.block_size - bo)
732
    if bi < len(hashmap):
733
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
734
    else:
735
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
736
    return bl # Return ammount of data written.
737

    
738
def hashmap_hash(request, hashmap):
739
    """Produce the root hash, treating the hashmap as a Merkle-like tree."""
740
    
741
    def subhash(d):
742
        h = hashlib.new(request.backend.hash_algorithm)
743
        h.update(d)
744
        return h.digest()
745
    
746
    if len(hashmap) == 0:
747
        return hexlify(subhash(''))
748
    if len(hashmap) == 1:
749
        return hashmap[0]
750
    
751
    s = 2
752
    while s < len(hashmap):
753
        s = s * 2
754
    h = [unhexlify(x) for x in hashmap]
755
    h += [('\x00' * len(h[0]))] * (s - len(hashmap))
756
    while len(h) > 1:
757
        h = [subhash(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
758
    return hexlify(h[0])
759

    
760
def update_response_headers(request, response):
761
    if request.serialization == 'xml':
762
        response['Content-Type'] = 'application/xml; charset=UTF-8'
763
    elif request.serialization == 'json':
764
        response['Content-Type'] = 'application/json; charset=UTF-8'
765
    elif not response['Content-Type']:
766
        response['Content-Type'] = 'text/plain; charset=UTF-8'
767
    
768
    if not response.has_header('Content-Length') and not (response.has_header('Content-Type') and response['Content-Type'].startswith('multipart/byteranges')):
769
        response['Content-Length'] = len(response.content)
770
    
771
    if settings.TEST:
772
        response['Date'] = format_date_time(time())
773

    
774
def render_fault(request, fault):
775
    if settings.DEBUG or settings.TEST:
776
        fault.details = format_exc(fault)
777
    
778
    request.serialization = 'text'
779
    data = '\n'.join((fault.message, fault.details)) + '\n'
780
    response = HttpResponse(data, status=fault.code)
781
    update_response_headers(request, response)
782
    return response
783

    
784
def request_serialization(request, format_allowed=False):
785
    """Return the serialization format requested.
786
    
787
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
788
    """
789
    
790
    if not format_allowed:
791
        return 'text'
792
    
793
    format = request.GET.get('format')
794
    if format == 'json':
795
        return 'json'
796
    elif format == 'xml':
797
        return 'xml'
798
    
799
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
800
        accept, sep, rest = item.strip().partition(';')
801
        if accept == 'application/json':
802
            return 'json'
803
        elif accept == 'application/xml' or accept == 'text/xml':
804
            return 'xml'
805
    
806
    return 'text'
807

    
808
def api_method(http_method=None, format_allowed=False, user_required=True):
809
    """Decorator function for views that implement an API method."""
810
    
811
    def decorator(func):
812
        @wraps(func)
813
        def wrapper(request, *args, **kwargs):
814
            try:
815
                if http_method and request.method != http_method:
816
                    raise BadRequest('Method not allowed.')
817
                if user_required and getattr(request, 'user', None) is None:
818
                    raise Unauthorized('Access denied')
819
                
820
                # The args variable may contain up to (account, container, object).
821
                if len(args) > 1 and len(args[1]) > 256:
822
                    raise BadRequest('Container name too large.')
823
                if len(args) > 2 and len(args[2]) > 1024:
824
                    raise BadRequest('Object name too large.')
825
                
826
                # Fill in custom request variables.
827
                request.serialization = request_serialization(request, format_allowed)
828
                request.backend = connect_backend()
829
                
830
                response = func(request, *args, **kwargs)
831
                update_response_headers(request, response)
832
                return response
833
            except Fault, fault:
834
                return render_fault(request, fault)
835
            except BaseException, e:
836
                logger.exception('Unexpected error: %s' % e)
837
                fault = ServiceUnavailable('Unexpected error')
838
                return render_fault(request, fault)
839
            finally:
840
                if getattr(request, 'backend', None) is not None:
841
                    request.backend.close()
842
        return wrapper
843
    return decorator