Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 8c306eab

History | View | Annotate | Download (34.1 kB)

1
# Copyright 2011 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.utils import simplejson as json
45
from django.utils.http import http_date, parse_etags
46
from django.utils.encoding import smart_unicode, smart_str
47
from django.core.files.uploadhandler import FileUploadHandler
48
from django.core.files.uploadedfile import UploadedFile
49

    
50
from pithos.lib.compat import parse_http_date_safe, parse_http_date
51
from pithos.lib.hashmap import HashMap
52

    
53
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
54
                                Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
55
                                RangeNotSatisfiable, ServiceUnavailable)
56
from pithos.api.short_url import encode_url
57
from pithos.backends import connect_backend
58
from pithos.backends.base import NotAllowedError, QuotaError
59

    
60
import logging
61
import re
62
import hashlib
63
import uuid
64
import decimal
65

    
66

    
67
logger = logging.getLogger(__name__)
68

    
69

    
70
class UTC(tzinfo):
71
   def utcoffset(self, dt):
72
       return timedelta(0)
73

    
74
   def tzname(self, dt):
75
       return 'UTC'
76

    
77
   def dst(self, dt):
78
       return timedelta(0)
79

    
80
def json_encode_decimal(obj):
81
    if isinstance(obj, decimal.Decimal):
82
        return str(obj)
83
    raise TypeError(repr(obj) + " is not JSON serializable")
84

    
85
def isoformat(d):
86
   """Return an ISO8601 date string that includes a timezone."""
87

    
88
   return d.replace(tzinfo=UTC()).isoformat()
89

    
90
def rename_meta_key(d, old, new):
91
    if old not in d:
92
        return
93
    d[new] = d[old]
94
    del(d[old])
95

    
96
def printable_header_dict(d):
97
    """Format a meta dictionary for printing out json/xml.
98
    
99
    Convert all keys to lower case and replace dashes with underscores.
100
    Format 'last_modified' timestamp.
101
    """
102
    
103
    if 'last_modified' in d:
104
        d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
105
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
106

    
107
def format_header_key(k):
108
    """Convert underscores to dashes and capitalize intra-dash strings."""
109
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
110

    
111
def get_header_prefix(request, prefix):
112
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
113
    
114
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
115
    # TODO: Document or remove '~' replacing.
116
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
117

    
118
def get_account_headers(request):
119
    meta = get_header_prefix(request, 'X-Account-Meta-')
120
    groups = {}
121
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
122
        n = k[16:].lower()
123
        if '-' in n or '_' in n:
124
            raise BadRequest('Bad characters in group name')
125
        groups[n] = v.replace(' ', '').split(',')
126
        while '' in groups[n]:
127
            groups[n].remove('')
128
    return meta, groups
129

    
130
def put_account_headers(response, meta, groups, policy):
131
    if 'count' in meta:
132
        response['X-Account-Container-Count'] = meta['count']
133
    if 'bytes' in meta:
134
        response['X-Account-Bytes-Used'] = meta['bytes']
135
    response['Last-Modified'] = http_date(int(meta['modified']))
136
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
137
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
138
    if 'until_timestamp' in meta:
139
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
140
    for k, v in groups.iteritems():
141
        k = smart_str(k, strings_only=True)
142
        k = format_header_key('X-Account-Group-' + k)
143
        v = smart_str(','.join(v), strings_only=True)
144
        response[k] = v
145
    for k, v in policy.iteritems():
146
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
147

    
148
def get_container_headers(request):
149
    meta = get_header_prefix(request, 'X-Container-Meta-')
150
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
151
    return meta, policy
152

    
153
def put_container_headers(request, response, meta, policy):
154
    if 'count' in meta:
155
        response['X-Container-Object-Count'] = meta['count']
156
    if 'bytes' in meta:
157
        response['X-Container-Bytes-Used'] = meta['bytes']
158
    response['Last-Modified'] = http_date(int(meta['modified']))
159
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
160
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
161
    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
162
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
163
    response['X-Container-Block-Size'] = request.backend.block_size
164
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
165
    if 'until_timestamp' in meta:
166
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
167
    for k, v in policy.iteritems():
168
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
169

    
170
def get_object_headers(request):
171
    meta = get_header_prefix(request, 'X-Object-Meta-')
172
    if request.META.get('CONTENT_TYPE'):
173
        meta['Content-Type'] = request.META['CONTENT_TYPE']
174
    if request.META.get('HTTP_CONTENT_ENCODING'):
175
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
176
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
177
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
178
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
179
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
180
    return meta, get_sharing(request), get_public(request)
181

    
182
def put_object_headers(response, meta, restricted=False):
183
    response['ETag'] = meta['ETag'] if 'ETag' in meta else meta['hash']
184
    response['Content-Length'] = meta['bytes']
185
    response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
186
    response['Last-Modified'] = http_date(int(meta['modified']))
187
    if not restricted:
188
        response['X-Object-Hash'] = meta['hash']
189
        response['X-Object-UUID'] = meta['uuid']
190
        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
191
        response['X-Object-Version'] = meta['version']
192
        response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
193
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
194
            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
195
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
196
                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
197
                  'X-Object-Public'):
198
            if k in meta:
199
                response[k] = smart_str(meta[k], strings_only=True)
200
    else:
201
        for k in ('Content-Encoding', 'Content-Disposition'):
202
            if k in meta:
203
                response[k] = smart_str(meta[k], strings_only=True)
204

    
205
def update_manifest_meta(request, v_account, meta):
206
    """Update metadata if the object has an X-Object-Manifest."""
207
    
208
    if 'X-Object-Manifest' in meta:
209
        etag = ''
210
        bytes = 0
211
        try:
212
            src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
213
            objects = request.backend.list_objects(request.user_uniq, v_account,
214
                                src_container, prefix=src_name, virtual=False)
215
            for x in objects:
216
                src_meta = request.backend.get_object_meta(request.user_uniq,
217
                                        v_account, src_container, x[0], 'pithos', x[1])
218
                if 'ETag' in src_meta:
219
                    etag += src_meta['ETag']
220
                bytes += src_meta['bytes']
221
        except:
222
            # Ignore errors.
223
            return
224
        meta['bytes'] = bytes
225
        md5 = hashlib.md5()
226
        md5.update(etag)
227
        meta['ETag'] = md5.hexdigest().lower()
228

    
229
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
230
    if permissions is None:
231
        return
232
    allowed, perm_path, perms = permissions
233
    if len(perms) == 0:
234
        return
235
    ret = []
236
    r = ','.join(perms.get('read', []))
237
    if r:
238
        ret.append('read=' + r)
239
    w = ','.join(perms.get('write', []))
240
    if w:
241
        ret.append('write=' + w)
242
    meta['X-Object-Sharing'] = '; '.join(ret)
243
    if '/'.join((v_account, v_container, v_object)) != perm_path:
244
        meta['X-Object-Shared-By'] = perm_path
245
    if request.user_uniq != v_account:
246
        meta['X-Object-Allowed-To'] = allowed
247

    
248
def update_public_meta(public, meta):
249
    if not public:
250
        return
251
    meta['X-Object-Public'] = '/public/' + encode_url(public)
252

    
253
def validate_modification_preconditions(request, meta):
254
    """Check that the modified timestamp conforms with the preconditions set."""
255
    
256
    if 'modified' not in meta:
257
        return # TODO: Always return?
258
    
259
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
260
    if if_modified_since is not None:
261
        if_modified_since = parse_http_date_safe(if_modified_since)
262
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
263
        raise NotModified('Resource has not been modified')
264
    
265
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
266
    if if_unmodified_since is not None:
267
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
268
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
269
        raise PreconditionFailed('Resource has been modified')
270

    
271
def validate_matching_preconditions(request, meta):
272
    """Check that the ETag conforms with the preconditions set."""
273
    
274
    etag = meta.get('ETag', None)
275
    
276
    if_match = request.META.get('HTTP_IF_MATCH')
277
    if if_match is not None:
278
        if etag is None:
279
            raise PreconditionFailed('Resource does not exist')
280
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
281
            raise PreconditionFailed('Resource ETag does not match')
282
    
283
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
284
    if if_none_match is not None:
285
        # TODO: If this passes, must ignore If-Modified-Since header.
286
        if etag is not None:
287
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
288
                # TODO: Continue if an If-Modified-Since header is present.
289
                if request.method in ('HEAD', 'GET'):
290
                    raise NotModified('Resource ETag matches')
291
                raise PreconditionFailed('Resource exists or ETag matches')
292

    
293
def split_container_object_string(s):
294
    if not len(s) > 0 or s[0] != '/':
295
        raise ValueError
296
    s = s[1:]
297
    pos = s.find('/')
298
    if pos == -1 or pos == len(s) - 1:
299
        raise ValueError
300
    return s[:pos], s[(pos + 1):]
301

    
302
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
303
    """Copy or move an object."""
304
    
305
    meta, permissions, public = get_object_headers(request)
306
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
307
    try:
308
        if move:
309
            version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
310
                                                        dest_account, dest_container, dest_name,
311
                                                        'pithos', meta, False, permissions)
312
        else:
313
            version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
314
                                                        dest_account, dest_container, dest_name,
315
                                                        'pithos', meta, False, permissions, src_version)
316
    except NotAllowedError:
317
        raise Forbidden('Not allowed')
318
    except (NameError, IndexError):
319
        raise ItemNotFound('Container or object does not exist')
320
    except ValueError:
321
        raise BadRequest('Invalid sharing header')
322
    except AttributeError, e:
323
        raise Conflict('\n'.join(e.data) + '\n')
324
    except QuotaError:
325
        raise RequestEntityTooLarge('Quota exceeded')
326
    if public is not None:
327
        try:
328
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
329
        except NotAllowedError:
330
            raise Forbidden('Not allowed')
331
        except NameError:
332
            raise ItemNotFound('Object does not exist')
333
    return version_id
334

    
335
def get_int_parameter(p):
336
    if p is not None:
337
        try:
338
            p = int(p)
339
        except ValueError:
340
            return None
341
        if p < 0:
342
            return None
343
    return p
344

    
345
def get_content_length(request):
346
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
347
    if content_length is None:
348
        raise LengthRequired('Missing or invalid Content-Length header')
349
    return content_length
350

    
351
def get_range(request, size):
352
    """Parse a Range header from the request.
353
    
354
    Either returns None, when the header is not existent or should be ignored,
355
    or a list of (offset, length) tuples - should be further checked.
356
    """
357
    
358
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
359
    if not ranges.startswith('bytes='):
360
        return None
361
    
362
    ret = []
363
    for r in (x.strip() for x in ranges[6:].split(',')):
364
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
365
        m = p.match(r)
366
        if not m:
367
            return None
368
        offset = m.group('offset')
369
        upto = m.group('upto')
370
        if offset == '' and upto == '':
371
            return None
372
        
373
        if offset != '':
374
            offset = int(offset)
375
            if upto != '':
376
                upto = int(upto)
377
                if offset > upto:
378
                    return None
379
                ret.append((offset, upto - offset + 1))
380
            else:
381
                ret.append((offset, size - offset))
382
        else:
383
            length = int(upto)
384
            ret.append((size - length, length))
385
    
386
    return ret
387

    
388
def get_content_range(request):
389
    """Parse a Content-Range header from the request.
390
    
391
    Either returns None, when the header is not existent or should be ignored,
392
    or an (offset, length, total) tuple - check as length, total may be None.
393
    Returns (None, None, None) if the provided range is '*/*'.
394
    """
395
    
396
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
397
    if not ranges:
398
        return None
399
    
400
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
401
    m = p.match(ranges)
402
    if not m:
403
        if ranges == 'bytes */*':
404
            return (None, None, None)
405
        return None
406
    offset = int(m.group('offset'))
407
    upto = m.group('upto')
408
    total = m.group('total')
409
    if upto != '':
410
        upto = int(upto)
411
    else:
412
        upto = None
413
    if total != '*':
414
        total = int(total)
415
    else:
416
        total = None
417
    if (upto is not None and offset > upto) or \
418
        (total is not None and offset >= total) or \
419
        (total is not None and upto is not None and upto >= total):
420
        return None
421
    
422
    if upto is None:
423
        length = None
424
    else:
425
        length = upto - offset + 1
426
    return (offset, length, total)
427

    
428
def get_sharing(request):
429
    """Parse an X-Object-Sharing header from the request.
430
    
431
    Raises BadRequest on error.
432
    """
433
    
434
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
435
    if permissions is None:
436
        return None
437
    
438
    # TODO: Document or remove '~' replacing.
439
    permissions = permissions.replace('~', '')
440
    
441
    ret = {}
442
    permissions = permissions.replace(' ', '')
443
    if permissions == '':
444
        return ret
445
    for perm in (x for x in permissions.split(';')):
446
        if perm.startswith('read='):
447
            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
448
            if '' in ret['read']:
449
                ret['read'].remove('')
450
            if '*' in ret['read']:
451
                ret['read'] = ['*']
452
            if len(ret['read']) == 0:
453
                raise BadRequest('Bad X-Object-Sharing header value')
454
        elif perm.startswith('write='):
455
            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
456
            if '' in ret['write']:
457
                ret['write'].remove('')
458
            if '*' in ret['write']:
459
                ret['write'] = ['*']
460
            if len(ret['write']) == 0:
461
                raise BadRequest('Bad X-Object-Sharing header value')
462
        else:
463
            raise BadRequest('Bad X-Object-Sharing header value')
464
    
465
    # Keep duplicates only in write list.
466
    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
467
    if dups:
468
        for x in dups:
469
            ret['read'].remove(x)
470
        if len(ret['read']) == 0:
471
            del(ret['read'])
472
    
473
    return ret
474

    
475
def get_public(request):
476
    """Parse an X-Object-Public header from the request.
477
    
478
    Raises BadRequest on error.
479
    """
480
    
481
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
482
    if public is None:
483
        return None
484
    
485
    public = public.replace(' ', '').lower()
486
    if public == 'true':
487
        return True
488
    elif public == 'false' or public == '':
489
        return False
490
    raise BadRequest('Bad X-Object-Public header value')
491

    
492
def raw_input_socket(request):
493
    """Return the socket for reading the rest of the request."""
494
    
495
    server_software = request.META.get('SERVER_SOFTWARE')
496
    if server_software and server_software.startswith('mod_python'):
497
        return request._req
498
    if 'wsgi.input' in request.environ:
499
        return request.environ['wsgi.input']
500
    raise ServiceUnavailable('Unknown server software')
501

    
502
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
503

    
504
def socket_read_iterator(request, length=0, blocksize=4096):
505
    """Return a maximum of blocksize data read from the socket in each iteration.
506
    
507
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
508
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
509
    """
510
    
511
    sock = raw_input_socket(request)
512
    if length < 0: # Chunked transfers
513
        # Small version (server does the dechunking).
514
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
515
            while length < MAX_UPLOAD_SIZE:
516
                data = sock.read(blocksize)
517
                if data == '':
518
                    return
519
                yield data
520
            raise BadRequest('Maximum size is reached')
521
        
522
        # Long version (do the dechunking).
523
        data = ''
524
        while length < MAX_UPLOAD_SIZE:
525
            # Get chunk size.
526
            if hasattr(sock, 'readline'):
527
                chunk_length = sock.readline()
528
            else:
529
                chunk_length = ''
530
                while chunk_length[-1:] != '\n':
531
                    chunk_length += sock.read(1)
532
                chunk_length.strip()
533
            pos = chunk_length.find(';')
534
            if pos >= 0:
535
                chunk_length = chunk_length[:pos]
536
            try:
537
                chunk_length = int(chunk_length, 16)
538
            except Exception, e:
539
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
540
            # Check if done.
541
            if chunk_length == 0:
542
                if len(data) > 0:
543
                    yield data
544
                return
545
            # Get the actual data.
546
            while chunk_length > 0:
547
                chunk = sock.read(min(chunk_length, blocksize))
548
                chunk_length -= len(chunk)
549
                if length > 0:
550
                    length += len(chunk)
551
                data += chunk
552
                if len(data) >= blocksize:
553
                    ret = data[:blocksize]
554
                    data = data[blocksize:]
555
                    yield ret
556
            sock.read(2) # CRLF
557
        raise BadRequest('Maximum size is reached')
558
    else:
559
        if length > MAX_UPLOAD_SIZE:
560
            raise BadRequest('Maximum size is reached')
561
        while length > 0:
562
            data = sock.read(min(length, blocksize))
563
            if not data:
564
                raise BadRequest()
565
            length -= len(data)
566
            yield data
567

    
568
class SaveToBackendHandler(FileUploadHandler):
569
    """Handle a file from an HTML form the django way."""
570
    
571
    def __init__(self, request=None):
572
        super(SaveToBackendHandler, self).__init__(request)
573
        self.backend = request.backend
574
    
575
    def put_data(self, length):
576
        if len(self.data) >= length:
577
            block = self.data[:length]
578
            self.file.hashmap.append(self.backend.put_block(block))
579
            self.md5.update(block)
580
            self.data = self.data[length:]
581
    
582
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
583
        self.md5 = hashlib.md5()        
584
        self.data = ''
585
        self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
586
        self.file.size = 0
587
        self.file.hashmap = []
588
    
589
    def receive_data_chunk(self, raw_data, start):
590
        self.data += raw_data
591
        self.file.size += len(raw_data)
592
        self.put_data(self.request.backend.block_size)
593
        return None
594
    
595
    def file_complete(self, file_size):
596
        l = len(self.data)
597
        if l > 0:
598
            self.put_data(l)
599
        self.file.etag = self.md5.hexdigest().lower()
600
        return self.file
601

    
602
class ObjectWrapper(object):
603
    """Return the object's data block-per-block in each iteration.
604
    
605
    Read from the object using the offset and length provided in each entry of the range list.
606
    """
607
    
608
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
609
        self.backend = backend
610
        self.ranges = ranges
611
        self.sizes = sizes
612
        self.hashmaps = hashmaps
613
        self.boundary = boundary
614
        self.size = sum(self.sizes)
615
        
616
        self.file_index = 0
617
        self.block_index = 0
618
        self.block_hash = -1
619
        self.block = ''
620
        
621
        self.range_index = -1
622
        self.offset, self.length = self.ranges[0]
623
    
624
    def __iter__(self):
625
        return self
626
    
627
    def part_iterator(self):
628
        if self.length > 0:
629
            # Get the file for the current offset.
630
            file_size = self.sizes[self.file_index]
631
            while self.offset >= file_size:
632
                self.offset -= file_size
633
                self.file_index += 1
634
                file_size = self.sizes[self.file_index]
635
            
636
            # Get the block for the current position.
637
            self.block_index = int(self.offset / self.backend.block_size)
638
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
639
                self.block_hash = self.hashmaps[self.file_index][self.block_index]
640
                try:
641
                    self.block = self.backend.get_block(self.block_hash)
642
                except NameError:
643
                    raise ItemNotFound('Block does not exist')
644
            
645
            # Get the data from the block.
646
            bo = self.offset % self.backend.block_size
647
            bl = min(self.length, len(self.block) - bo)
648
            data = self.block[bo:bo + bl]
649
            self.offset += bl
650
            self.length -= bl
651
            return data
652
        else:
653
            raise StopIteration
654
    
655
    def next(self):
656
        if len(self.ranges) == 1:
657
            return self.part_iterator()
658
        if self.range_index == len(self.ranges):
659
            raise StopIteration
660
        try:
661
            if self.range_index == -1:
662
                raise StopIteration
663
            return self.part_iterator()
664
        except StopIteration:
665
            self.range_index += 1
666
            out = []
667
            if self.range_index < len(self.ranges):
668
                # Part header.
669
                self.offset, self.length = self.ranges[self.range_index]
670
                self.file_index = 0
671
                if self.range_index > 0:
672
                    out.append('')
673
                out.append('--' + self.boundary)
674
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
675
                out.append('Content-Transfer-Encoding: binary')
676
                out.append('')
677
                out.append('')
678
                return '\r\n'.join(out)
679
            else:
680
                # Footer.
681
                out.append('')
682
                out.append('--' + self.boundary + '--')
683
                out.append('')
684
                return '\r\n'.join(out)
685

    
686
def object_data_response(request, sizes, hashmaps, meta, public=False):
687
    """Get the HttpResponse object for replying with the object's data."""
688
    
689
    # Range handling.
690
    size = sum(sizes)
691
    ranges = get_range(request, size)
692
    if ranges is None:
693
        ranges = [(0, size)]
694
        ret = 200
695
    else:
696
        check = [True for offset, length in ranges if
697
                    length <= 0 or length > size or
698
                    offset < 0 or offset >= size or
699
                    offset + length > size]
700
        if len(check) > 0:
701
            raise RangeNotSatisfiable('Requested range exceeds object limits')
702
        ret = 206
703
        if_range = request.META.get('HTTP_IF_RANGE')
704
        if if_range:
705
            try:
706
                # Modification time has passed instead.
707
                last_modified = parse_http_date(if_range)
708
                if last_modified != meta['modified']:
709
                    ranges = [(0, size)]
710
                    ret = 200
711
            except ValueError:
712
                if if_range != meta['ETag']:
713
                    ranges = [(0, size)]
714
                    ret = 200
715
    
716
    if ret == 206 and len(ranges) > 1:
717
        boundary = uuid.uuid4().hex
718
    else:
719
        boundary = ''
720
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
721
    response = HttpResponse(wrapper, status=ret)
722
    put_object_headers(response, meta, public)
723
    if ret == 206:
724
        if len(ranges) == 1:
725
            offset, length = ranges[0]
726
            response['Content-Length'] = length # Update with the correct length.
727
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
728
        else:
729
            del(response['Content-Length'])
730
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
731
    return response
732

    
733
def put_object_block(request, hashmap, data, offset):
734
    """Put one block of data at the given offset."""
735
    
736
    bi = int(offset / request.backend.block_size)
737
    bo = offset % request.backend.block_size
738
    bl = min(len(data), request.backend.block_size - bo)
739
    if bi < len(hashmap):
740
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
741
    else:
742
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
743
    return bl # Return ammount of data written.
744

    
745
#def hashmap_hash(request, hashmap):
746
#    """Produce the root hash, treating the hashmap as a Merkle-like tree."""
747
#    
748
#    map = HashMap(request.backend.block_size, request.backend.hash_algorithm)
749
#    map.extend([unhexlify(x) for x in hashmap])
750
#    return hexlify(map.hash())
751

    
752
def hashmap_md5(request, hashmap, size):
753
    """Produce the MD5 sum from the data in the hashmap."""
754
    
755
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
756
    md5 = hashlib.md5()
757
    bs = request.backend.block_size
758
    for bi, hash in enumerate(hashmap):
759
        data = request.backend.get_block(hash)
760
        if bi == len(hashmap) - 1:
761
            bs = size % bs
762
        pad = bs - min(len(data), bs)
763
        md5.update(data + ('\x00' * pad))
764
    return md5.hexdigest().lower()
765

    
766
def get_backend():
767
    backend = connect_backend(db_module=settings.BACKEND_DB_MODULE,
768
                              db_connection=settings.BACKEND_DB_CONNECTION,
769
                              block_module=settings.BACKEND_BLOCK_MODULE,
770
                              block_path=settings.BACKEND_BLOCK_PATH)
771
    backend.default_policy['quota'] = settings.BACKEND_QUOTA
772
    backend.default_policy['versioning'] = settings.BACKEND_VERSIONING
773
    return backend
774

    
775
def update_request_headers(request):
776
    # Handle URL-encoded keys and values.
777
    # Handle URL-encoded keys and values.
778
    meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
779
    if len(meta) > 90:
780
        raise BadRequest('Too many headers.')
781
    for k, v in meta.iteritems():
782
        if len(k) > 128:
783
            raise BadRequest('Header name too large.')
784
        if len(v) > 256:
785
            raise BadRequest('Header value too large.')
786
        try:
787
            k.decode('ascii')
788
            v.decode('ascii')
789
        except UnicodeDecodeError:
790
            raise BadRequest('Bad character in headers.')
791
        if '%' in k or '%' in v:
792
            del(request.META[k])
793
            request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
794

    
795
def update_response_headers(request, response):
796
    if request.serialization == 'xml':
797
        response['Content-Type'] = 'application/xml; charset=UTF-8'
798
    elif request.serialization == 'json':
799
        response['Content-Type'] = 'application/json; charset=UTF-8'
800
    elif not response['Content-Type']:
801
        response['Content-Type'] = 'text/plain; charset=UTF-8'
802
    
803
    if (not response.has_header('Content-Length') and
804
        not (response.has_header('Content-Type') and
805
             response['Content-Type'].startswith('multipart/byteranges'))):
806
        response['Content-Length'] = len(response.content)
807
    
808
    # URL-encode unicode in headers.
809
    meta = response.items()
810
    for k, v in meta:
811
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
812
            k.startswith('X-Object-') or k.startswith('Content-')):
813
            del(response[k])
814
            response[quote(k)] = quote(v, safe='/=,:@; ')
815
    
816
    if settings.TEST:
817
        response['Date'] = format_date_time(time())
818

    
819
def render_fault(request, fault):
820
    if settings.DEBUG or settings.TEST:
821
        fault.details = format_exc(fault)
822
    
823
    request.serialization = 'text'
824
    data = '\n'.join((fault.message, fault.details)) + '\n'
825
    response = HttpResponse(data, status=fault.code)
826
    update_response_headers(request, response)
827
    return response
828

    
829
def request_serialization(request, format_allowed=False):
830
    """Return the serialization format requested.
831
    
832
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
833
    """
834
    
835
    if not format_allowed:
836
        return 'text'
837
    
838
    format = request.GET.get('format')
839
    if format == 'json':
840
        return 'json'
841
    elif format == 'xml':
842
        return 'xml'
843
    
844
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
845
        accept, sep, rest = item.strip().partition(';')
846
        if accept == 'application/json':
847
            return 'json'
848
        elif accept == 'application/xml' or accept == 'text/xml':
849
            return 'xml'
850
    
851
    return 'text'
852

    
853
def api_method(http_method=None, format_allowed=False, user_required=True):
854
    """Decorator function for views that implement an API method."""
855
    
856
    def decorator(func):
857
        @wraps(func)
858
        def wrapper(request, *args, **kwargs):
859
            try:
860
                if http_method and request.method != http_method:
861
                    raise BadRequest('Method not allowed.')
862
                if user_required and getattr(request, 'user', None) is None:
863
                    raise Unauthorized('Access denied')
864
                
865
                # The args variable may contain up to (account, container, object).
866
                if len(args) > 1 and len(args[1]) > 256:
867
                    raise BadRequest('Container name too large.')
868
                if len(args) > 2 and len(args[2]) > 1024:
869
                    raise BadRequest('Object name too large.')
870
                
871
                # Format and check headers.
872
                update_request_headers(request)
873
                
874
                # Fill in custom request variables.
875
                request.serialization = request_serialization(request, format_allowed)
876
                request.backend = get_backend()
877
                
878
                response = func(request, *args, **kwargs)
879
                update_response_headers(request, response)
880
                return response
881
            except Fault, fault:
882
                return render_fault(request, fault)
883
            except BaseException, e:
884
                logger.exception('Unexpected error: %s' % e)
885
                fault = ServiceUnavailable('Unexpected error')
886
                return render_fault(request, fault)
887
            finally:
888
                if getattr(request, 'backend', None) is not None:
889
                    request.backend.close()
890
        return wrapper
891
    return decorator