Statistics
| Branch: | Tag: | Revision:

root / pithos / api / util.py @ a7ba12d7

History | View | Annotate | Download (33.9 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.utils import simplejson as json
45
from django.utils.http import http_date, parse_etags
46
from django.utils.encoding import smart_unicode, smart_str
47
from django.core.files.uploadhandler import FileUploadHandler
48
from django.core.files.uploadedfile import UploadedFile
49

    
50
from pithos.lib.compat import parse_http_date_safe, parse_http_date
51

    
52
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
53
                                Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
54
                                RangeNotSatisfiable, InternalServerError, NotImplemented)
55
from pithos.api.short_url import encode_url
56
from pithos.backends import connect_backend
57
from pithos.backends.base import NotAllowedError, QuotaError
58

    
59
import logging
60
import re
61
import hashlib
62
import uuid
63
import decimal
64

    
65

    
66
logger = logging.getLogger(__name__)
67

    
68

    
69
class UTC(tzinfo):
70
   def utcoffset(self, dt):
71
       return timedelta(0)
72

    
73
   def tzname(self, dt):
74
       return 'UTC'
75

    
76
   def dst(self, dt):
77
       return timedelta(0)
78

    
79
def json_encode_decimal(obj):
80
    if isinstance(obj, decimal.Decimal):
81
        return str(obj)
82
    raise TypeError(repr(obj) + " is not JSON serializable")
83

    
84
def isoformat(d):
85
   """Return an ISO8601 date string that includes a timezone."""
86

    
87
   return d.replace(tzinfo=UTC()).isoformat()
88

    
89
def rename_meta_key(d, old, new):
90
    if old not in d:
91
        return
92
    d[new] = d[old]
93
    del(d[old])
94

    
95
def printable_header_dict(d):
96
    """Format a meta dictionary for printing out json/xml.
97
    
98
    Convert all keys to lower case and replace dashes with underscores.
99
    Format 'last_modified' timestamp.
100
    """
101
    
102
    if 'last_modified' in d:
103
        d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
104
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
105

    
106
def format_header_key(k):
107
    """Convert underscores to dashes and capitalize intra-dash strings."""
108
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
109

    
110
def get_header_prefix(request, prefix):
111
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
112
    
113
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
114
    # TODO: Document or remove '~' replacing.
115
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
116

    
117
def get_account_headers(request):
118
    meta = get_header_prefix(request, 'X-Account-Meta-')
119
    groups = {}
120
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
121
        n = k[16:].lower()
122
        if '-' in n or '_' in n:
123
            raise BadRequest('Bad characters in group name')
124
        groups[n] = v.replace(' ', '').split(',')
125
        while '' in groups[n]:
126
            groups[n].remove('')
127
    return meta, groups
128

    
129
def put_account_headers(response, meta, groups, policy):
130
    if 'count' in meta:
131
        response['X-Account-Container-Count'] = meta['count']
132
    if 'bytes' in meta:
133
        response['X-Account-Bytes-Used'] = meta['bytes']
134
    response['Last-Modified'] = http_date(int(meta['modified']))
135
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
136
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
137
    if 'until_timestamp' in meta:
138
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
139
    for k, v in groups.iteritems():
140
        k = smart_str(k, strings_only=True)
141
        k = format_header_key('X-Account-Group-' + k)
142
        v = smart_str(','.join(v), strings_only=True)
143
        response[k] = v
144
    for k, v in policy.iteritems():
145
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
146

    
147
def get_container_headers(request):
148
    meta = get_header_prefix(request, 'X-Container-Meta-')
149
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
150
    return meta, policy
151

    
152
def put_container_headers(request, response, meta, policy):
153
    if 'count' in meta:
154
        response['X-Container-Object-Count'] = meta['count']
155
    if 'bytes' in meta:
156
        response['X-Container-Bytes-Used'] = meta['bytes']
157
    response['Last-Modified'] = http_date(int(meta['modified']))
158
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
159
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
160
    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
161
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
162
    response['X-Container-Block-Size'] = request.backend.block_size
163
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
164
    if 'until_timestamp' in meta:
165
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
166
    for k, v in policy.iteritems():
167
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
168

    
169
def get_object_headers(request):
170
    meta = get_header_prefix(request, 'X-Object-Meta-')
171
    if request.META.get('CONTENT_TYPE'):
172
        meta['Content-Type'] = request.META['CONTENT_TYPE']
173
    if request.META.get('HTTP_CONTENT_ENCODING'):
174
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
175
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
176
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
177
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
178
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
179
    return meta, get_sharing(request), get_public(request)
180

    
181
def put_object_headers(response, meta, restricted=False):
182
    if 'ETag' in meta:
183
        response['ETag'] = meta['ETag']
184
    response['Content-Length'] = meta['bytes']
185
    response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
186
    response['Last-Modified'] = http_date(int(meta['modified']))
187
    if not restricted:
188
        response['X-Object-Hash'] = meta['hash']
189
        response['X-Object-UUID'] = meta['uuid']
190
        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
191
        response['X-Object-Version'] = meta['version']
192
        response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
193
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
194
            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
195
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
196
                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
197
                  'X-Object-Public'):
198
            if k in meta:
199
                response[k] = smart_str(meta[k], strings_only=True)
200
    else:
201
        for k in ('Content-Encoding', 'Content-Disposition'):
202
            if k in meta:
203
                response[k] = smart_str(meta[k], strings_only=True)
204

    
205
def update_manifest_meta(request, v_account, meta):
206
    """Update metadata if the object has an X-Object-Manifest."""
207
    
208
    if 'X-Object-Manifest' in meta:
209
        etag = ''
210
        bytes = 0
211
        try:
212
            src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
213
            objects = request.backend.list_objects(request.user_uniq, v_account,
214
                                src_container, prefix=src_name, virtual=False)
215
            for x in objects:
216
                src_meta = request.backend.get_object_meta(request.user_uniq,
217
                                        v_account, src_container, x[0], 'pithos', x[1])
218
                if 'ETag' in src_meta:
219
                    etag += src_meta['ETag']
220
                bytes += src_meta['bytes']
221
        except:
222
            # Ignore errors.
223
            return
224
        meta['bytes'] = bytes
225
        md5 = hashlib.md5()
226
        md5.update(etag)
227
        meta['ETag'] = md5.hexdigest().lower()
228

    
229
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
230
    if permissions is None:
231
        return
232
    allowed, perm_path, perms = permissions
233
    if len(perms) == 0:
234
        return
235
    ret = []
236
    r = ','.join(perms.get('read', []))
237
    if r:
238
        ret.append('read=' + r)
239
    w = ','.join(perms.get('write', []))
240
    if w:
241
        ret.append('write=' + w)
242
    meta['X-Object-Sharing'] = '; '.join(ret)
243
    if '/'.join((v_account, v_container, v_object)) != perm_path:
244
        meta['X-Object-Shared-By'] = perm_path
245
    if request.user_uniq != v_account:
246
        meta['X-Object-Allowed-To'] = allowed
247

    
248
def update_public_meta(public, meta):
249
    if not public:
250
        return
251
    meta['X-Object-Public'] = '/public/' + encode_url(public)
252

    
253
def validate_modification_preconditions(request, meta):
254
    """Check that the modified timestamp conforms with the preconditions set."""
255
    
256
    if 'modified' not in meta:
257
        return # TODO: Always return?
258
    
259
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
260
    if if_modified_since is not None:
261
        if_modified_since = parse_http_date_safe(if_modified_since)
262
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
263
        raise NotModified('Resource has not been modified')
264
    
265
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
266
    if if_unmodified_since is not None:
267
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
268
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
269
        raise PreconditionFailed('Resource has been modified')
270

    
271
def validate_matching_preconditions(request, meta):
272
    """Check that the ETag conforms with the preconditions set."""
273
    
274
    etag = meta.get('ETag', None)
275
    
276
    if_match = request.META.get('HTTP_IF_MATCH')
277
    if if_match is not None:
278
        if etag is None:
279
            raise PreconditionFailed('Resource does not exist')
280
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
281
            raise PreconditionFailed('Resource ETag does not match')
282
    
283
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
284
    if if_none_match is not None:
285
        # TODO: If this passes, must ignore If-Modified-Since header.
286
        if etag is not None:
287
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
288
                # TODO: Continue if an If-Modified-Since header is present.
289
                if request.method in ('HEAD', 'GET'):
290
                    raise NotModified('Resource ETag matches')
291
                raise PreconditionFailed('Resource exists or ETag matches')
292

    
293
def split_container_object_string(s):
294
    if not len(s) > 0 or s[0] != '/':
295
        raise ValueError
296
    s = s[1:]
297
    pos = s.find('/')
298
    if pos == -1 or pos == len(s) - 1:
299
        raise ValueError
300
    return s[:pos], s[(pos + 1):]
301

    
302
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
303
    """Copy or move an object."""
304
    
305
    meta, permissions, public = get_object_headers(request)
306
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
307
    try:
308
        if move:
309
            version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
310
                                                        dest_account, dest_container, dest_name,
311
                                                        'pithos', meta, False, permissions)
312
        else:
313
            version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
314
                                                        dest_account, dest_container, dest_name,
315
                                                        'pithos', meta, False, permissions, src_version)
316
    except NotAllowedError:
317
        raise Forbidden('Not allowed')
318
    except (NameError, IndexError):
319
        raise ItemNotFound('Container or object does not exist')
320
    except ValueError:
321
        raise BadRequest('Invalid sharing header')
322
    except AttributeError, e:
323
        raise Conflict('\n'.join(e.data) + '\n')
324
    except QuotaError:
325
        raise RequestEntityTooLarge('Quota exceeded')
326
    if public is not None:
327
        try:
328
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
329
        except NotAllowedError:
330
            raise Forbidden('Not allowed')
331
        except NameError:
332
            raise ItemNotFound('Object does not exist')
333
    return version_id
334

    
335
def get_int_parameter(p):
336
    if p is not None:
337
        try:
338
            p = int(p)
339
        except ValueError:
340
            return None
341
        if p < 0:
342
            return None
343
    return p
344

    
345
def get_content_length(request):
346
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
347
    if content_length is None:
348
        raise LengthRequired('Missing or invalid Content-Length header')
349
    return content_length
350

    
351
def get_range(request, size):
352
    """Parse a Range header from the request.
353
    
354
    Either returns None, when the header is not existent or should be ignored,
355
    or a list of (offset, length) tuples - should be further checked.
356
    """
357
    
358
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
359
    if not ranges.startswith('bytes='):
360
        return None
361
    
362
    ret = []
363
    for r in (x.strip() for x in ranges[6:].split(',')):
364
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
365
        m = p.match(r)
366
        if not m:
367
            return None
368
        offset = m.group('offset')
369
        upto = m.group('upto')
370
        if offset == '' and upto == '':
371
            return None
372
        
373
        if offset != '':
374
            offset = int(offset)
375
            if upto != '':
376
                upto = int(upto)
377
                if offset > upto:
378
                    return None
379
                ret.append((offset, upto - offset + 1))
380
            else:
381
                ret.append((offset, size - offset))
382
        else:
383
            length = int(upto)
384
            ret.append((size - length, length))
385
    
386
    return ret
387

    
388
def get_content_range(request):
389
    """Parse a Content-Range header from the request.
390
    
391
    Either returns None, when the header is not existent or should be ignored,
392
    or an (offset, length, total) tuple - check as length, total may be None.
393
    Returns (None, None, None) if the provided range is '*/*'.
394
    """
395
    
396
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
397
    if not ranges:
398
        return None
399
    
400
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
401
    m = p.match(ranges)
402
    if not m:
403
        if ranges == 'bytes */*':
404
            return (None, None, None)
405
        return None
406
    offset = int(m.group('offset'))
407
    upto = m.group('upto')
408
    total = m.group('total')
409
    if upto != '':
410
        upto = int(upto)
411
    else:
412
        upto = None
413
    if total != '*':
414
        total = int(total)
415
    else:
416
        total = None
417
    if (upto is not None and offset > upto) or \
418
        (total is not None and offset >= total) or \
419
        (total is not None and upto is not None and upto >= total):
420
        return None
421
    
422
    if upto is None:
423
        length = None
424
    else:
425
        length = upto - offset + 1
426
    return (offset, length, total)
427

    
428
def get_sharing(request):
429
    """Parse an X-Object-Sharing header from the request.
430
    
431
    Raises BadRequest on error.
432
    """
433
    
434
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
435
    if permissions is None:
436
        return None
437
    
438
    # TODO: Document or remove '~' replacing.
439
    permissions = permissions.replace('~', '')
440
    
441
    ret = {}
442
    permissions = permissions.replace(' ', '')
443
    if permissions == '':
444
        return ret
445
    for perm in (x for x in permissions.split(';')):
446
        if perm.startswith('read='):
447
            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
448
            if '' in ret['read']:
449
                ret['read'].remove('')
450
            if '*' in ret['read']:
451
                ret['read'] = ['*']
452
            if len(ret['read']) == 0:
453
                raise BadRequest('Bad X-Object-Sharing header value')
454
        elif perm.startswith('write='):
455
            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
456
            if '' in ret['write']:
457
                ret['write'].remove('')
458
            if '*' in ret['write']:
459
                ret['write'] = ['*']
460
            if len(ret['write']) == 0:
461
                raise BadRequest('Bad X-Object-Sharing header value')
462
        else:
463
            raise BadRequest('Bad X-Object-Sharing header value')
464
    
465
    # Keep duplicates only in write list.
466
    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
467
    if dups:
468
        for x in dups:
469
            ret['read'].remove(x)
470
        if len(ret['read']) == 0:
471
            del(ret['read'])
472
    
473
    return ret
474

    
475
def get_public(request):
476
    """Parse an X-Object-Public header from the request.
477
    
478
    Raises BadRequest on error.
479
    """
480
    
481
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
482
    if public is None:
483
        return None
484
    
485
    public = public.replace(' ', '').lower()
486
    if public == 'true':
487
        return True
488
    elif public == 'false' or public == '':
489
        return False
490
    raise BadRequest('Bad X-Object-Public header value')
491

    
492
def raw_input_socket(request):
493
    """Return the socket for reading the rest of the request."""
494
    
495
    server_software = request.META.get('SERVER_SOFTWARE')
496
    if server_software and server_software.startswith('mod_python'):
497
        return request._req
498
    if 'wsgi.input' in request.environ:
499
        return request.environ['wsgi.input']
500
    raise NotImplemented('Unknown server software')
501

    
502
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
503

    
504
def socket_read_iterator(request, length=0, blocksize=4096):
505
    """Return a maximum of blocksize data read from the socket in each iteration.
506
    
507
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
508
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
509
    """
510
    
511
    sock = raw_input_socket(request)
512
    if length < 0: # Chunked transfers
513
        # Small version (server does the dechunking).
514
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
515
            while length < MAX_UPLOAD_SIZE:
516
                data = sock.read(blocksize)
517
                if data == '':
518
                    return
519
                yield data
520
            raise BadRequest('Maximum size is reached')
521
        
522
        # Long version (do the dechunking).
523
        data = ''
524
        while length < MAX_UPLOAD_SIZE:
525
            # Get chunk size.
526
            if hasattr(sock, 'readline'):
527
                chunk_length = sock.readline()
528
            else:
529
                chunk_length = ''
530
                while chunk_length[-1:] != '\n':
531
                    chunk_length += sock.read(1)
532
                chunk_length.strip()
533
            pos = chunk_length.find(';')
534
            if pos >= 0:
535
                chunk_length = chunk_length[:pos]
536
            try:
537
                chunk_length = int(chunk_length, 16)
538
            except Exception, e:
539
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
540
            # Check if done.
541
            if chunk_length == 0:
542
                if len(data) > 0:
543
                    yield data
544
                return
545
            # Get the actual data.
546
            while chunk_length > 0:
547
                chunk = sock.read(min(chunk_length, blocksize))
548
                chunk_length -= len(chunk)
549
                if length > 0:
550
                    length += len(chunk)
551
                data += chunk
552
                if len(data) >= blocksize:
553
                    ret = data[:blocksize]
554
                    data = data[blocksize:]
555
                    yield ret
556
            sock.read(2) # CRLF
557
        raise BadRequest('Maximum size is reached')
558
    else:
559
        if length > MAX_UPLOAD_SIZE:
560
            raise BadRequest('Maximum size is reached')
561
        while length > 0:
562
            data = sock.read(min(length, blocksize))
563
            if not data:
564
                raise BadRequest()
565
            length -= len(data)
566
            yield data
567

    
568
class SaveToBackendHandler(FileUploadHandler):
569
    """Handle a file from an HTML form the django way."""
570
    
571
    def __init__(self, request=None):
572
        super(SaveToBackendHandler, self).__init__(request)
573
        self.backend = request.backend
574
    
575
    def put_data(self, length):
576
        if len(self.data) >= length:
577
            block = self.data[:length]
578
            self.file.hashmap.append(self.backend.put_block(block))
579
            self.md5.update(block)
580
            self.data = self.data[length:]
581
    
582
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
583
        self.md5 = hashlib.md5()        
584
        self.data = ''
585
        self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
586
        self.file.size = 0
587
        self.file.hashmap = []
588
    
589
    def receive_data_chunk(self, raw_data, start):
590
        self.data += raw_data
591
        self.file.size += len(raw_data)
592
        self.put_data(self.request.backend.block_size)
593
        return None
594
    
595
    def file_complete(self, file_size):
596
        l = len(self.data)
597
        if l > 0:
598
            self.put_data(l)
599
        self.file.etag = self.md5.hexdigest().lower()
600
        return self.file
601

    
602
class ObjectWrapper(object):
603
    """Return the object's data block-per-block in each iteration.
604
    
605
    Read from the object using the offset and length provided in each entry of the range list.
606
    """
607
    
608
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
609
        self.backend = backend
610
        self.ranges = ranges
611
        self.sizes = sizes
612
        self.hashmaps = hashmaps
613
        self.boundary = boundary
614
        self.size = sum(self.sizes)
615
        
616
        self.file_index = 0
617
        self.block_index = 0
618
        self.block_hash = -1
619
        self.block = ''
620
        
621
        self.range_index = -1
622
        self.offset, self.length = self.ranges[0]
623
    
624
    def __iter__(self):
625
        return self
626
    
627
    def part_iterator(self):
628
        if self.length > 0:
629
            # Get the file for the current offset.
630
            file_size = self.sizes[self.file_index]
631
            while self.offset >= file_size:
632
                self.offset -= file_size
633
                self.file_index += 1
634
                file_size = self.sizes[self.file_index]
635
            
636
            # Get the block for the current position.
637
            self.block_index = int(self.offset / self.backend.block_size)
638
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
639
                self.block_hash = self.hashmaps[self.file_index][self.block_index]
640
                try:
641
                    self.block = self.backend.get_block(self.block_hash)
642
                except NameError:
643
                    raise ItemNotFound('Block does not exist')
644
            
645
            # Get the data from the block.
646
            bo = self.offset % self.backend.block_size
647
            bl = min(self.length, len(self.block) - bo)
648
            data = self.block[bo:bo + bl]
649
            self.offset += bl
650
            self.length -= bl
651
            return data
652
        else:
653
            raise StopIteration
654
    
655
    def next(self):
656
        if len(self.ranges) == 1:
657
            return self.part_iterator()
658
        if self.range_index == len(self.ranges):
659
            raise StopIteration
660
        try:
661
            if self.range_index == -1:
662
                raise StopIteration
663
            return self.part_iterator()
664
        except StopIteration:
665
            self.range_index += 1
666
            out = []
667
            if self.range_index < len(self.ranges):
668
                # Part header.
669
                self.offset, self.length = self.ranges[self.range_index]
670
                self.file_index = 0
671
                if self.range_index > 0:
672
                    out.append('')
673
                out.append('--' + self.boundary)
674
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
675
                out.append('Content-Transfer-Encoding: binary')
676
                out.append('')
677
                out.append('')
678
                return '\r\n'.join(out)
679
            else:
680
                # Footer.
681
                out.append('')
682
                out.append('--' + self.boundary + '--')
683
                out.append('')
684
                return '\r\n'.join(out)
685

    
686
def object_data_response(request, sizes, hashmaps, meta, public=False):
687
    """Get the HttpResponse object for replying with the object's data."""
688
    
689
    # Range handling.
690
    size = sum(sizes)
691
    ranges = get_range(request, size)
692
    if ranges is None:
693
        ranges = [(0, size)]
694
        ret = 200
695
    else:
696
        check = [True for offset, length in ranges if
697
                    length <= 0 or length > size or
698
                    offset < 0 or offset >= size or
699
                    offset + length > size]
700
        if len(check) > 0:
701
            raise RangeNotSatisfiable('Requested range exceeds object limits')
702
        ret = 206
703
        if_range = request.META.get('HTTP_IF_RANGE')
704
        if if_range:
705
            try:
706
                # Modification time has passed instead.
707
                last_modified = parse_http_date(if_range)
708
                if last_modified != meta['modified']:
709
                    ranges = [(0, size)]
710
                    ret = 200
711
            except ValueError:
712
                if if_range != meta['ETag']:
713
                    ranges = [(0, size)]
714
                    ret = 200
715
    
716
    if ret == 206 and len(ranges) > 1:
717
        boundary = uuid.uuid4().hex
718
    else:
719
        boundary = ''
720
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
721
    response = HttpResponse(wrapper, status=ret)
722
    put_object_headers(response, meta, public)
723
    if ret == 206:
724
        if len(ranges) == 1:
725
            offset, length = ranges[0]
726
            response['Content-Length'] = length # Update with the correct length.
727
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
728
        else:
729
            del(response['Content-Length'])
730
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
731
    return response
732

    
733
def put_object_block(request, hashmap, data, offset):
734
    """Put one block of data at the given offset."""
735
    
736
    bi = int(offset / request.backend.block_size)
737
    bo = offset % request.backend.block_size
738
    bl = min(len(data), request.backend.block_size - bo)
739
    if bi < len(hashmap):
740
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
741
    else:
742
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
743
    return bl # Return ammount of data written.
744

    
745
def hashmap_md5(request, hashmap, size):
746
    """Produce the MD5 sum from the data in the hashmap."""
747
    
748
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
749
    md5 = hashlib.md5()
750
    bs = request.backend.block_size
751
    for bi, hash in enumerate(hashmap):
752
        data = request.backend.get_block(hash)
753
        if bi == len(hashmap) - 1:
754
            bs = size % bs
755
        pad = bs - min(len(data), bs)
756
        md5.update(data + ('\x00' * pad))
757
    return md5.hexdigest().lower()
758

    
759
def get_backend():
760
    backend = connect_backend(db_module=settings.BACKEND_DB_MODULE,
761
                              db_connection=settings.BACKEND_DB_CONNECTION,
762
                              block_module=settings.BACKEND_BLOCK_MODULE,
763
                              block_path=settings.BACKEND_BLOCK_PATH)
764
    backend.default_policy['quota'] = settings.BACKEND_QUOTA
765
    backend.default_policy['versioning'] = settings.BACKEND_VERSIONING
766
    return backend
767

    
768
def update_request_headers(request):
769
    # Handle URL-encoded keys and values.
770
    # Handle URL-encoded keys and values.
771
    meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
772
    if len(meta) > 90:
773
        raise BadRequest('Too many headers.')
774
    for k, v in meta.iteritems():
775
        if len(k) > 128:
776
            raise BadRequest('Header name too large.')
777
        if len(v) > 256:
778
            raise BadRequest('Header value too large.')
779
        try:
780
            k.decode('ascii')
781
            v.decode('ascii')
782
        except UnicodeDecodeError:
783
            raise BadRequest('Bad character in headers.')
784
        if '%' in k or '%' in v:
785
            del(request.META[k])
786
            request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
787

    
788
def update_response_headers(request, response):
789
    if request.serialization == 'xml':
790
        response['Content-Type'] = 'application/xml; charset=UTF-8'
791
    elif request.serialization == 'json':
792
        response['Content-Type'] = 'application/json; charset=UTF-8'
793
    elif not response['Content-Type']:
794
        response['Content-Type'] = 'text/plain; charset=UTF-8'
795
    
796
    if (not response.has_header('Content-Length') and
797
        not (response.has_header('Content-Type') and
798
             response['Content-Type'].startswith('multipart/byteranges'))):
799
        response['Content-Length'] = len(response.content)
800
    
801
    # URL-encode unicode in headers.
802
    meta = response.items()
803
    for k, v in meta:
804
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
805
            k.startswith('X-Object-') or k.startswith('Content-')):
806
            del(response[k])
807
            response[quote(k)] = quote(v, safe='/=,:@; ')
808
    
809
    if settings.TEST:
810
        response['Date'] = format_date_time(time())
811

    
812
def render_fault(request, fault):
813
    if isinstance(fault, InternalServerError) and (settings.DEBUG or settings.TEST):
814
        fault.details = format_exc(fault)
815
    
816
    request.serialization = 'text'
817
    data = fault.message + '\n'
818
    if fault.details:
819
        data += '\n' + fault.details
820
    response = HttpResponse(data, status=fault.code)
821
    update_response_headers(request, response)
822
    return response
823

    
824
def request_serialization(request, format_allowed=False):
825
    """Return the serialization format requested.
826
    
827
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
828
    """
829
    
830
    if not format_allowed:
831
        return 'text'
832
    
833
    format = request.GET.get('format')
834
    if format == 'json':
835
        return 'json'
836
    elif format == 'xml':
837
        return 'xml'
838
    
839
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
840
        accept, sep, rest = item.strip().partition(';')
841
        if accept == 'application/json':
842
            return 'json'
843
        elif accept == 'application/xml' or accept == 'text/xml':
844
            return 'xml'
845
    
846
    return 'text'
847

    
848
def api_method(http_method=None, format_allowed=False, user_required=True):
849
    """Decorator function for views that implement an API method."""
850
    
851
    def decorator(func):
852
        @wraps(func)
853
        def wrapper(request, *args, **kwargs):
854
            try:
855
                if http_method and request.method != http_method:
856
                    raise BadRequest('Method not allowed.')
857
                if user_required and getattr(request, 'user', None) is None:
858
                    raise Unauthorized('Access denied')
859
                
860
                # The args variable may contain up to (account, container, object).
861
                if len(args) > 1 and len(args[1]) > 256:
862
                    raise BadRequest('Container name too large.')
863
                if len(args) > 2 and len(args[2]) > 1024:
864
                    raise BadRequest('Object name too large.')
865
                
866
                # Format and check headers.
867
                update_request_headers(request)
868
                
869
                # Fill in custom request variables.
870
                request.serialization = request_serialization(request, format_allowed)
871
                request.backend = get_backend()
872
                
873
                response = func(request, *args, **kwargs)
874
                update_response_headers(request, response)
875
                return response
876
            except Fault, fault:
877
                return render_fault(request, fault)
878
            except BaseException, e:
879
                logger.exception('Unexpected error: %s' % e)
880
                fault = InternalServerError('Unexpected error')
881
                return render_fault(request, fault)
882
            finally:
883
                if getattr(request, 'backend', None) is not None:
884
                    request.backend.close()
885
        return wrapper
886
    return decorator