Statistics
| Branch: | Tag: | Revision:

root / pithos / api / util.py @ 53cff70c

History | View | Annotate | Download (34 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.utils import simplejson as json
45
from django.utils.http import http_date, parse_etags
46
from django.utils.encoding import smart_unicode, smart_str
47
from django.core.files.uploadhandler import FileUploadHandler
48
from django.core.files.uploadedfile import UploadedFile
49

    
50
from pithos.lib.compat import parse_http_date_safe, parse_http_date
51

    
52
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
53
                                Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
54
                                RangeNotSatisfiable, InternalServerError, NotImplemented)
55
from pithos.api.short_url import encode_url
56
from pithos.backends import connect_backend
57
from pithos.backends.base import NotAllowedError, QuotaError
58

    
59
import logging
60
import re
61
import hashlib
62
import uuid
63
import decimal
64

    
65

    
66
logger = logging.getLogger(__name__)
67

    
68

    
69
class UTC(tzinfo):
70
   def utcoffset(self, dt):
71
       return timedelta(0)
72

    
73
   def tzname(self, dt):
74
       return 'UTC'
75

    
76
   def dst(self, dt):
77
       return timedelta(0)
78

    
79
def json_encode_decimal(obj):
80
    if isinstance(obj, decimal.Decimal):
81
        return str(obj)
82
    raise TypeError(repr(obj) + " is not JSON serializable")
83

    
84
def isoformat(d):
85
   """Return an ISO8601 date string that includes a timezone."""
86

    
87
   return d.replace(tzinfo=UTC()).isoformat()
88

    
89
def rename_meta_key(d, old, new):
90
    if old not in d:
91
        return
92
    d[new] = d[old]
93
    del(d[old])
94

    
95
def printable_header_dict(d):
96
    """Format a meta dictionary for printing out json/xml.
97
    
98
    Convert all keys to lower case and replace dashes with underscores.
99
    Format 'last_modified' timestamp.
100
    """
101
    
102
    if 'last_modified' in d:
103
        d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
104
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
105

    
106
def format_header_key(k):
107
    """Convert underscores to dashes and capitalize intra-dash strings."""
108
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
109

    
110
def get_header_prefix(request, prefix):
111
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
112
    
113
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
114
    # TODO: Document or remove '~' replacing.
115
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
116

    
117
def get_account_headers(request):
118
    meta = get_header_prefix(request, 'X-Account-Meta-')
119
    groups = {}
120
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
121
        n = k[16:].lower()
122
        if '-' in n or '_' in n:
123
            raise BadRequest('Bad characters in group name')
124
        groups[n] = v.replace(' ', '').split(',')
125
        while '' in groups[n]:
126
            groups[n].remove('')
127
    return meta, groups
128

    
129
def put_account_headers(response, meta, groups, policy):
130
    if 'count' in meta:
131
        response['X-Account-Container-Count'] = meta['count']
132
    if 'bytes' in meta:
133
        response['X-Account-Bytes-Used'] = meta['bytes']
134
    response['Last-Modified'] = http_date(int(meta['modified']))
135
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
136
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
137
    if 'until_timestamp' in meta:
138
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
139
    for k, v in groups.iteritems():
140
        k = smart_str(k, strings_only=True)
141
        k = format_header_key('X-Account-Group-' + k)
142
        v = smart_str(','.join(v), strings_only=True)
143
        response[k] = v
144
    for k, v in policy.iteritems():
145
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
146

    
147
def get_container_headers(request):
148
    meta = get_header_prefix(request, 'X-Container-Meta-')
149
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
150
    return meta, policy
151

    
152
def put_container_headers(request, response, meta, policy):
153
    if 'count' in meta:
154
        response['X-Container-Object-Count'] = meta['count']
155
    if 'bytes' in meta:
156
        response['X-Container-Bytes-Used'] = meta['bytes']
157
    response['Last-Modified'] = http_date(int(meta['modified']))
158
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
159
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
160
    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
161
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
162
    response['X-Container-Block-Size'] = request.backend.block_size
163
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
164
    if 'until_timestamp' in meta:
165
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
166
    for k, v in policy.iteritems():
167
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
168

    
169
def get_object_headers(request):
170
    meta = get_header_prefix(request, 'X-Object-Meta-')
171
    if request.META.get('CONTENT_TYPE'):
172
        meta['Content-Type'] = request.META['CONTENT_TYPE']
173
    if request.META.get('HTTP_CONTENT_ENCODING'):
174
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
175
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
176
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
177
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
178
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
179
    return meta, get_sharing(request), get_public(request)
180

    
181
def put_object_headers(response, meta, restricted=False):
182
    if 'ETag' in meta:
183
        response['ETag'] = meta['ETag']
184
    response['Content-Length'] = meta['bytes']
185
    response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
186
    response['Last-Modified'] = http_date(int(meta['modified']))
187
    if not restricted:
188
        response['X-Object-Hash'] = meta['hash']
189
        response['X-Object-UUID'] = meta['uuid']
190
        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
191
        response['X-Object-Version'] = meta['version']
192
        response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
193
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
194
            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
195
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
196
                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
197
                  'X-Object-Public'):
198
            if k in meta:
199
                response[k] = smart_str(meta[k], strings_only=True)
200
    else:
201
        for k in ('Content-Encoding', 'Content-Disposition'):
202
            if k in meta:
203
                response[k] = smart_str(meta[k], strings_only=True)
204

    
205
def update_manifest_meta(request, v_account, meta):
206
    """Update metadata if the object has an X-Object-Manifest."""
207
    
208
    if 'X-Object-Manifest' in meta:
209
        etag = ''
210
        bytes = 0
211
        try:
212
            src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
213
            objects = request.backend.list_objects(request.user_uniq, v_account,
214
                                src_container, prefix=src_name, virtual=False)
215
            for x in objects:
216
                src_meta = request.backend.get_object_meta(request.user_uniq,
217
                                        v_account, src_container, x[0], 'pithos', x[1])
218
                if 'ETag' in src_meta:
219
                    etag += src_meta['ETag']
220
                bytes += src_meta['bytes']
221
        except:
222
            # Ignore errors.
223
            return
224
        meta['bytes'] = bytes
225
        md5 = hashlib.md5()
226
        md5.update(etag)
227
        meta['ETag'] = md5.hexdigest().lower()
228

    
229
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
230
    if permissions is None:
231
        return
232
    allowed, perm_path, perms = permissions
233
    if len(perms) == 0:
234
        return
235
    ret = []
236
    r = ','.join(perms.get('read', []))
237
    if r:
238
        ret.append('read=' + r)
239
    w = ','.join(perms.get('write', []))
240
    if w:
241
        ret.append('write=' + w)
242
    meta['X-Object-Sharing'] = '; '.join(ret)
243
    if '/'.join((v_account, v_container, v_object)) != perm_path:
244
        meta['X-Object-Shared-By'] = perm_path
245
    if request.user_uniq != v_account:
246
        meta['X-Object-Allowed-To'] = allowed
247

    
248
def update_public_meta(public, meta):
249
    if not public:
250
        return
251
    meta['X-Object-Public'] = '/public/' + encode_url(public)
252

    
253
def validate_modification_preconditions(request, meta):
254
    """Check that the modified timestamp conforms with the preconditions set."""
255
    
256
    if 'modified' not in meta:
257
        return # TODO: Always return?
258
    
259
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
260
    if if_modified_since is not None:
261
        if_modified_since = parse_http_date_safe(if_modified_since)
262
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
263
        raise NotModified('Resource has not been modified')
264
    
265
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
266
    if if_unmodified_since is not None:
267
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
268
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
269
        raise PreconditionFailed('Resource has been modified')
270

    
271
def validate_matching_preconditions(request, meta):
272
    """Check that the ETag conforms with the preconditions set."""
273
    
274
    etag = meta.get('ETag', None)
275
    
276
    if_match = request.META.get('HTTP_IF_MATCH')
277
    if if_match is not None:
278
        if etag is None:
279
            raise PreconditionFailed('Resource does not exist')
280
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
281
            raise PreconditionFailed('Resource ETag does not match')
282
    
283
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
284
    if if_none_match is not None:
285
        # TODO: If this passes, must ignore If-Modified-Since header.
286
        if etag is not None:
287
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
288
                # TODO: Continue if an If-Modified-Since header is present.
289
                if request.method in ('HEAD', 'GET'):
290
                    raise NotModified('Resource ETag matches')
291
                raise PreconditionFailed('Resource exists or ETag matches')
292

    
293
def split_container_object_string(s):
294
    if not len(s) > 0 or s[0] != '/':
295
        raise ValueError
296
    s = s[1:]
297
    pos = s.find('/')
298
    if pos == -1 or pos == len(s) - 1:
299
        raise ValueError
300
    return s[:pos], s[(pos + 1):]
301

    
302
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
303
    """Copy or move an object."""
304
    
305
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
306
        del(request.META['CONTENT_TYPE'])
307
    meta, permissions, public = get_object_headers(request)
308
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
309
    try:
310
        if move:
311
            version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
312
                                                        dest_account, dest_container, dest_name,
313
                                                        'pithos', meta, False, permissions)
314
        else:
315
            version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
316
                                                        dest_account, dest_container, dest_name,
317
                                                        'pithos', meta, False, permissions, src_version)
318
    except NotAllowedError:
319
        raise Forbidden('Not allowed')
320
    except (NameError, IndexError):
321
        raise ItemNotFound('Container or object does not exist')
322
    except ValueError:
323
        raise BadRequest('Invalid sharing header')
324
    except AttributeError, e:
325
        raise Conflict('\n'.join(e.data) + '\n')
326
    except QuotaError:
327
        raise RequestEntityTooLarge('Quota exceeded')
328
    if public is not None:
329
        try:
330
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
331
        except NotAllowedError:
332
            raise Forbidden('Not allowed')
333
        except NameError:
334
            raise ItemNotFound('Object does not exist')
335
    return version_id
336

    
337
def get_int_parameter(p):
338
    if p is not None:
339
        try:
340
            p = int(p)
341
        except ValueError:
342
            return None
343
        if p < 0:
344
            return None
345
    return p
346

    
347
def get_content_length(request):
348
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
349
    if content_length is None:
350
        raise LengthRequired('Missing or invalid Content-Length header')
351
    return content_length
352

    
353
def get_range(request, size):
354
    """Parse a Range header from the request.
355
    
356
    Either returns None, when the header is not existent or should be ignored,
357
    or a list of (offset, length) tuples - should be further checked.
358
    """
359
    
360
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
361
    if not ranges.startswith('bytes='):
362
        return None
363
    
364
    ret = []
365
    for r in (x.strip() for x in ranges[6:].split(',')):
366
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
367
        m = p.match(r)
368
        if not m:
369
            return None
370
        offset = m.group('offset')
371
        upto = m.group('upto')
372
        if offset == '' and upto == '':
373
            return None
374
        
375
        if offset != '':
376
            offset = int(offset)
377
            if upto != '':
378
                upto = int(upto)
379
                if offset > upto:
380
                    return None
381
                ret.append((offset, upto - offset + 1))
382
            else:
383
                ret.append((offset, size - offset))
384
        else:
385
            length = int(upto)
386
            ret.append((size - length, length))
387
    
388
    return ret
389

    
390
def get_content_range(request):
391
    """Parse a Content-Range header from the request.
392
    
393
    Either returns None, when the header is not existent or should be ignored,
394
    or an (offset, length, total) tuple - check as length, total may be None.
395
    Returns (None, None, None) if the provided range is '*/*'.
396
    """
397
    
398
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
399
    if not ranges:
400
        return None
401
    
402
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
403
    m = p.match(ranges)
404
    if not m:
405
        if ranges == 'bytes */*':
406
            return (None, None, None)
407
        return None
408
    offset = int(m.group('offset'))
409
    upto = m.group('upto')
410
    total = m.group('total')
411
    if upto != '':
412
        upto = int(upto)
413
    else:
414
        upto = None
415
    if total != '*':
416
        total = int(total)
417
    else:
418
        total = None
419
    if (upto is not None and offset > upto) or \
420
        (total is not None and offset >= total) or \
421
        (total is not None and upto is not None and upto >= total):
422
        return None
423
    
424
    if upto is None:
425
        length = None
426
    else:
427
        length = upto - offset + 1
428
    return (offset, length, total)
429

    
430
def get_sharing(request):
431
    """Parse an X-Object-Sharing header from the request.
432
    
433
    Raises BadRequest on error.
434
    """
435
    
436
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
437
    if permissions is None:
438
        return None
439
    
440
    # TODO: Document or remove '~' replacing.
441
    permissions = permissions.replace('~', '')
442
    
443
    ret = {}
444
    permissions = permissions.replace(' ', '')
445
    if permissions == '':
446
        return ret
447
    for perm in (x for x in permissions.split(';')):
448
        if perm.startswith('read='):
449
            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
450
            if '' in ret['read']:
451
                ret['read'].remove('')
452
            if '*' in ret['read']:
453
                ret['read'] = ['*']
454
            if len(ret['read']) == 0:
455
                raise BadRequest('Bad X-Object-Sharing header value')
456
        elif perm.startswith('write='):
457
            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
458
            if '' in ret['write']:
459
                ret['write'].remove('')
460
            if '*' in ret['write']:
461
                ret['write'] = ['*']
462
            if len(ret['write']) == 0:
463
                raise BadRequest('Bad X-Object-Sharing header value')
464
        else:
465
            raise BadRequest('Bad X-Object-Sharing header value')
466
    
467
    # Keep duplicates only in write list.
468
    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
469
    if dups:
470
        for x in dups:
471
            ret['read'].remove(x)
472
        if len(ret['read']) == 0:
473
            del(ret['read'])
474
    
475
    return ret
476

    
477
def get_public(request):
478
    """Parse an X-Object-Public header from the request.
479
    
480
    Raises BadRequest on error.
481
    """
482
    
483
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
484
    if public is None:
485
        return None
486
    
487
    public = public.replace(' ', '').lower()
488
    if public == 'true':
489
        return True
490
    elif public == 'false' or public == '':
491
        return False
492
    raise BadRequest('Bad X-Object-Public header value')
493

    
494
def raw_input_socket(request):
495
    """Return the socket for reading the rest of the request."""
496
    
497
    server_software = request.META.get('SERVER_SOFTWARE')
498
    if server_software and server_software.startswith('mod_python'):
499
        return request._req
500
    if 'wsgi.input' in request.environ:
501
        return request.environ['wsgi.input']
502
    raise NotImplemented('Unknown server software')
503

    
504
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
505

    
506
def socket_read_iterator(request, length=0, blocksize=4096):
507
    """Return a maximum of blocksize data read from the socket in each iteration.
508
    
509
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
510
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
511
    """
512
    
513
    sock = raw_input_socket(request)
514
    if length < 0: # Chunked transfers
515
        # Small version (server does the dechunking).
516
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
517
            while length < MAX_UPLOAD_SIZE:
518
                data = sock.read(blocksize)
519
                if data == '':
520
                    return
521
                yield data
522
            raise BadRequest('Maximum size is reached')
523
        
524
        # Long version (do the dechunking).
525
        data = ''
526
        while length < MAX_UPLOAD_SIZE:
527
            # Get chunk size.
528
            if hasattr(sock, 'readline'):
529
                chunk_length = sock.readline()
530
            else:
531
                chunk_length = ''
532
                while chunk_length[-1:] != '\n':
533
                    chunk_length += sock.read(1)
534
                chunk_length.strip()
535
            pos = chunk_length.find(';')
536
            if pos >= 0:
537
                chunk_length = chunk_length[:pos]
538
            try:
539
                chunk_length = int(chunk_length, 16)
540
            except Exception, e:
541
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
542
            # Check if done.
543
            if chunk_length == 0:
544
                if len(data) > 0:
545
                    yield data
546
                return
547
            # Get the actual data.
548
            while chunk_length > 0:
549
                chunk = sock.read(min(chunk_length, blocksize))
550
                chunk_length -= len(chunk)
551
                if length > 0:
552
                    length += len(chunk)
553
                data += chunk
554
                if len(data) >= blocksize:
555
                    ret = data[:blocksize]
556
                    data = data[blocksize:]
557
                    yield ret
558
            sock.read(2) # CRLF
559
        raise BadRequest('Maximum size is reached')
560
    else:
561
        if length > MAX_UPLOAD_SIZE:
562
            raise BadRequest('Maximum size is reached')
563
        while length > 0:
564
            data = sock.read(min(length, blocksize))
565
            if not data:
566
                raise BadRequest()
567
            length -= len(data)
568
            yield data
569

    
570
class SaveToBackendHandler(FileUploadHandler):
571
    """Handle a file from an HTML form the django way."""
572
    
573
    def __init__(self, request=None):
574
        super(SaveToBackendHandler, self).__init__(request)
575
        self.backend = request.backend
576
    
577
    def put_data(self, length):
578
        if len(self.data) >= length:
579
            block = self.data[:length]
580
            self.file.hashmap.append(self.backend.put_block(block))
581
            self.md5.update(block)
582
            self.data = self.data[length:]
583
    
584
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
585
        self.md5 = hashlib.md5()        
586
        self.data = ''
587
        self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
588
        self.file.size = 0
589
        self.file.hashmap = []
590
    
591
    def receive_data_chunk(self, raw_data, start):
592
        self.data += raw_data
593
        self.file.size += len(raw_data)
594
        self.put_data(self.request.backend.block_size)
595
        return None
596
    
597
    def file_complete(self, file_size):
598
        l = len(self.data)
599
        if l > 0:
600
            self.put_data(l)
601
        self.file.etag = self.md5.hexdigest().lower()
602
        return self.file
603

    
604
class ObjectWrapper(object):
605
    """Return the object's data block-per-block in each iteration.
606
    
607
    Read from the object using the offset and length provided in each entry of the range list.
608
    """
609
    
610
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
611
        self.backend = backend
612
        self.ranges = ranges
613
        self.sizes = sizes
614
        self.hashmaps = hashmaps
615
        self.boundary = boundary
616
        self.size = sum(self.sizes)
617
        
618
        self.file_index = 0
619
        self.block_index = 0
620
        self.block_hash = -1
621
        self.block = ''
622
        
623
        self.range_index = -1
624
        self.offset, self.length = self.ranges[0]
625
    
626
    def __iter__(self):
627
        return self
628
    
629
    def part_iterator(self):
630
        if self.length > 0:
631
            # Get the file for the current offset.
632
            file_size = self.sizes[self.file_index]
633
            while self.offset >= file_size:
634
                self.offset -= file_size
635
                self.file_index += 1
636
                file_size = self.sizes[self.file_index]
637
            
638
            # Get the block for the current position.
639
            self.block_index = int(self.offset / self.backend.block_size)
640
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
641
                self.block_hash = self.hashmaps[self.file_index][self.block_index]
642
                try:
643
                    self.block = self.backend.get_block(self.block_hash)
644
                except NameError:
645
                    raise ItemNotFound('Block does not exist')
646
            
647
            # Get the data from the block.
648
            bo = self.offset % self.backend.block_size
649
            bl = min(self.length, len(self.block) - bo)
650
            data = self.block[bo:bo + bl]
651
            self.offset += bl
652
            self.length -= bl
653
            return data
654
        else:
655
            raise StopIteration
656
    
657
    def next(self):
658
        if len(self.ranges) == 1:
659
            return self.part_iterator()
660
        if self.range_index == len(self.ranges):
661
            raise StopIteration
662
        try:
663
            if self.range_index == -1:
664
                raise StopIteration
665
            return self.part_iterator()
666
        except StopIteration:
667
            self.range_index += 1
668
            out = []
669
            if self.range_index < len(self.ranges):
670
                # Part header.
671
                self.offset, self.length = self.ranges[self.range_index]
672
                self.file_index = 0
673
                if self.range_index > 0:
674
                    out.append('')
675
                out.append('--' + self.boundary)
676
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
677
                out.append('Content-Transfer-Encoding: binary')
678
                out.append('')
679
                out.append('')
680
                return '\r\n'.join(out)
681
            else:
682
                # Footer.
683
                out.append('')
684
                out.append('--' + self.boundary + '--')
685
                out.append('')
686
                return '\r\n'.join(out)
687

    
688
def object_data_response(request, sizes, hashmaps, meta, public=False):
689
    """Get the HttpResponse object for replying with the object's data."""
690
    
691
    # Range handling.
692
    size = sum(sizes)
693
    ranges = get_range(request, size)
694
    if ranges is None:
695
        ranges = [(0, size)]
696
        ret = 200
697
    else:
698
        check = [True for offset, length in ranges if
699
                    length <= 0 or length > size or
700
                    offset < 0 or offset >= size or
701
                    offset + length > size]
702
        if len(check) > 0:
703
            raise RangeNotSatisfiable('Requested range exceeds object limits')
704
        ret = 206
705
        if_range = request.META.get('HTTP_IF_RANGE')
706
        if if_range:
707
            try:
708
                # Modification time has passed instead.
709
                last_modified = parse_http_date(if_range)
710
                if last_modified != meta['modified']:
711
                    ranges = [(0, size)]
712
                    ret = 200
713
            except ValueError:
714
                if if_range != meta['ETag']:
715
                    ranges = [(0, size)]
716
                    ret = 200
717
    
718
    if ret == 206 and len(ranges) > 1:
719
        boundary = uuid.uuid4().hex
720
    else:
721
        boundary = ''
722
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
723
    response = HttpResponse(wrapper, status=ret)
724
    put_object_headers(response, meta, public)
725
    if ret == 206:
726
        if len(ranges) == 1:
727
            offset, length = ranges[0]
728
            response['Content-Length'] = length # Update with the correct length.
729
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
730
        else:
731
            del(response['Content-Length'])
732
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
733
    return response
734

    
735
def put_object_block(request, hashmap, data, offset):
736
    """Put one block of data at the given offset."""
737
    
738
    bi = int(offset / request.backend.block_size)
739
    bo = offset % request.backend.block_size
740
    bl = min(len(data), request.backend.block_size - bo)
741
    if bi < len(hashmap):
742
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
743
    else:
744
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
745
    return bl # Return ammount of data written.
746

    
747
def hashmap_md5(request, hashmap, size):
748
    """Produce the MD5 sum from the data in the hashmap."""
749
    
750
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
751
    md5 = hashlib.md5()
752
    bs = request.backend.block_size
753
    for bi, hash in enumerate(hashmap):
754
        data = request.backend.get_block(hash)
755
        if bi == len(hashmap) - 1:
756
            bs = size % bs
757
        pad = bs - min(len(data), bs)
758
        md5.update(data + ('\x00' * pad))
759
    return md5.hexdigest().lower()
760

    
761
def get_backend():
762
    backend = connect_backend(db_module=settings.BACKEND_DB_MODULE,
763
                              db_connection=settings.BACKEND_DB_CONNECTION,
764
                              block_module=settings.BACKEND_BLOCK_MODULE,
765
                              block_path=settings.BACKEND_BLOCK_PATH)
766
    backend.default_policy['quota'] = settings.BACKEND_QUOTA
767
    backend.default_policy['versioning'] = settings.BACKEND_VERSIONING
768
    return backend
769

    
770
def update_request_headers(request):
771
    # Handle URL-encoded keys and values.
772
    # Handle URL-encoded keys and values.
773
    meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
774
    if len(meta) > 90:
775
        raise BadRequest('Too many headers.')
776
    for k, v in meta.iteritems():
777
        if len(k) > 128:
778
            raise BadRequest('Header name too large.')
779
        if len(v) > 256:
780
            raise BadRequest('Header value too large.')
781
        try:
782
            k.decode('ascii')
783
            v.decode('ascii')
784
        except UnicodeDecodeError:
785
            raise BadRequest('Bad character in headers.')
786
        if '%' in k or '%' in v:
787
            del(request.META[k])
788
            request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
789

    
790
def update_response_headers(request, response):
791
    if request.serialization == 'xml':
792
        response['Content-Type'] = 'application/xml; charset=UTF-8'
793
    elif request.serialization == 'json':
794
        response['Content-Type'] = 'application/json; charset=UTF-8'
795
    elif not response['Content-Type']:
796
        response['Content-Type'] = 'text/plain; charset=UTF-8'
797
    
798
    if (not response.has_header('Content-Length') and
799
        not (response.has_header('Content-Type') and
800
             response['Content-Type'].startswith('multipart/byteranges'))):
801
        response['Content-Length'] = len(response.content)
802
    
803
    # URL-encode unicode in headers.
804
    meta = response.items()
805
    for k, v in meta:
806
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
807
            k.startswith('X-Object-') or k.startswith('Content-')):
808
            del(response[k])
809
            response[quote(k)] = quote(v, safe='/=,:@; ')
810
    
811
    if settings.TEST:
812
        response['Date'] = format_date_time(time())
813

    
814
def render_fault(request, fault):
815
    if isinstance(fault, InternalServerError) and (settings.DEBUG or settings.TEST):
816
        fault.details = format_exc(fault)
817
    
818
    request.serialization = 'text'
819
    data = fault.message + '\n'
820
    if fault.details:
821
        data += '\n' + fault.details
822
    response = HttpResponse(data, status=fault.code)
823
    update_response_headers(request, response)
824
    return response
825

    
826
def request_serialization(request, format_allowed=False):
827
    """Return the serialization format requested.
828
    
829
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
830
    """
831
    
832
    if not format_allowed:
833
        return 'text'
834
    
835
    format = request.GET.get('format')
836
    if format == 'json':
837
        return 'json'
838
    elif format == 'xml':
839
        return 'xml'
840
    
841
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
842
        accept, sep, rest = item.strip().partition(';')
843
        if accept == 'application/json':
844
            return 'json'
845
        elif accept == 'application/xml' or accept == 'text/xml':
846
            return 'xml'
847
    
848
    return 'text'
849

    
850
def api_method(http_method=None, format_allowed=False, user_required=True):
851
    """Decorator function for views that implement an API method."""
852
    
853
    def decorator(func):
854
        @wraps(func)
855
        def wrapper(request, *args, **kwargs):
856
            try:
857
                if http_method and request.method != http_method:
858
                    raise BadRequest('Method not allowed.')
859
                if user_required and getattr(request, 'user', None) is None:
860
                    raise Unauthorized('Access denied')
861
                
862
                # The args variable may contain up to (account, container, object).
863
                if len(args) > 1 and len(args[1]) > 256:
864
                    raise BadRequest('Container name too large.')
865
                if len(args) > 2 and len(args[2]) > 1024:
866
                    raise BadRequest('Object name too large.')
867
                
868
                # Format and check headers.
869
                update_request_headers(request)
870
                
871
                # Fill in custom request variables.
872
                request.serialization = request_serialization(request, format_allowed)
873
                request.backend = get_backend()
874
                
875
                response = func(request, *args, **kwargs)
876
                update_response_headers(request, response)
877
                return response
878
            except Fault, fault:
879
                return render_fault(request, fault)
880
            except BaseException, e:
881
                logger.exception('Unexpected error: %s' % e)
882
                fault = InternalServerError('Unexpected error')
883
                return render_fault(request, fault)
884
            finally:
885
                if getattr(request, 'backend', None) is not None:
886
                    request.backend.close()
887
        return wrapper
888
    return decorator