Statistics
| Branch: | Tag: | Revision:

root / pithos / api / util.py @ af7bb62f

History | View | Annotate | Download (34.4 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from pithos.lib.compat import parse_http_date_safe, parse_http_date
52

    
53
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
54
                                Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
55
                                RangeNotSatisfiable, InternalServerError, NotImplemented)
56
from pithos.api.short_url import encode_url
57
from pithos.backends import connect_backend
58
from pithos.backends.base import NotAllowedError, QuotaError
59

    
60
import logging
61
import re
62
import hashlib
63
import uuid
64
import decimal
65

    
66

    
67
logger = logging.getLogger(__name__)
68

    
69

    
70
class UTC(tzinfo):
71
   def utcoffset(self, dt):
72
       return timedelta(0)
73

    
74
   def tzname(self, dt):
75
       return 'UTC'
76

    
77
   def dst(self, dt):
78
       return timedelta(0)
79

    
80
def json_encode_decimal(obj):
81
    if isinstance(obj, decimal.Decimal):
82
        return str(obj)
83
    raise TypeError(repr(obj) + " is not JSON serializable")
84

    
85
def isoformat(d):
86
   """Return an ISO8601 date string that includes a timezone."""
87

    
88
   return d.replace(tzinfo=UTC()).isoformat()
89

    
90
def rename_meta_key(d, old, new):
91
    if old not in d:
92
        return
93
    d[new] = d[old]
94
    del(d[old])
95

    
96
def printable_header_dict(d):
97
    """Format a meta dictionary for printing out json/xml.
98
    
99
    Convert all keys to lower case and replace dashes with underscores.
100
    Format 'last_modified' timestamp.
101
    """
102
    
103
    if 'last_modified' in d:
104
        d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
105
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
106

    
107
def format_header_key(k):
108
    """Convert underscores to dashes and capitalize intra-dash strings."""
109
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
110

    
111
def get_header_prefix(request, prefix):
112
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
113
    
114
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
115
    # TODO: Document or remove '~' replacing.
116
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
117

    
118
def get_account_headers(request):
119
    meta = get_header_prefix(request, 'X-Account-Meta-')
120
    groups = {}
121
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
122
        n = k[16:].lower()
123
        if '-' in n or '_' in n:
124
            raise BadRequest('Bad characters in group name')
125
        groups[n] = v.replace(' ', '').split(',')
126
        while '' in groups[n]:
127
            groups[n].remove('')
128
    return meta, groups
129

    
130
def put_account_headers(response, meta, groups, policy):
131
    if 'count' in meta:
132
        response['X-Account-Container-Count'] = meta['count']
133
    if 'bytes' in meta:
134
        response['X-Account-Bytes-Used'] = meta['bytes']
135
    response['Last-Modified'] = http_date(int(meta['modified']))
136
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
137
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
138
    if 'until_timestamp' in meta:
139
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
140
    for k, v in groups.iteritems():
141
        k = smart_str(k, strings_only=True)
142
        k = format_header_key('X-Account-Group-' + k)
143
        v = smart_str(','.join(v), strings_only=True)
144
        response[k] = v
145
    for k, v in policy.iteritems():
146
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
147

    
148
def get_container_headers(request):
149
    meta = get_header_prefix(request, 'X-Container-Meta-')
150
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
151
    return meta, policy
152

    
153
def put_container_headers(request, response, meta, policy):
154
    if 'count' in meta:
155
        response['X-Container-Object-Count'] = meta['count']
156
    if 'bytes' in meta:
157
        response['X-Container-Bytes-Used'] = meta['bytes']
158
    response['Last-Modified'] = http_date(int(meta['modified']))
159
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
160
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
161
    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
162
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
163
    response['X-Container-Block-Size'] = request.backend.block_size
164
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
165
    if 'until_timestamp' in meta:
166
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
167
    for k, v in policy.iteritems():
168
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
169

    
170
def get_object_headers(request):
171
    meta = get_header_prefix(request, 'X-Object-Meta-')
172
    if request.META.get('CONTENT_TYPE'):
173
        meta['Content-Type'] = request.META['CONTENT_TYPE']
174
    if request.META.get('HTTP_CONTENT_ENCODING'):
175
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
176
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
177
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
178
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
179
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
180
    return meta, get_sharing(request), get_public(request)
181

    
182
def put_object_headers(response, meta, restricted=False):
183
    if 'ETag' in meta:
184
        response['ETag'] = meta['ETag']
185
    response['Content-Length'] = meta['bytes']
186
    response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
187
    response['Last-Modified'] = http_date(int(meta['modified']))
188
    if not restricted:
189
        response['X-Object-Hash'] = meta['hash']
190
        response['X-Object-UUID'] = meta['uuid']
191
        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
192
        response['X-Object-Version'] = meta['version']
193
        response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
194
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
195
            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
196
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
197
                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
198
                  'X-Object-Public'):
199
            if k in meta:
200
                response[k] = smart_str(meta[k], strings_only=True)
201
    else:
202
        for k in ('Content-Encoding', 'Content-Disposition'):
203
            if k in meta:
204
                response[k] = smart_str(meta[k], strings_only=True)
205

    
206
def update_manifest_meta(request, v_account, meta):
207
    """Update metadata if the object has an X-Object-Manifest."""
208
    
209
    if 'X-Object-Manifest' in meta:
210
        etag = ''
211
        bytes = 0
212
        try:
213
            src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
214
            objects = request.backend.list_objects(request.user_uniq, v_account,
215
                                src_container, prefix=src_name, virtual=False)
216
            for x in objects:
217
                src_meta = request.backend.get_object_meta(request.user_uniq,
218
                                        v_account, src_container, x[0], 'pithos', x[1])
219
                if 'ETag' in src_meta:
220
                    etag += src_meta['ETag']
221
                bytes += src_meta['bytes']
222
        except:
223
            # Ignore errors.
224
            return
225
        meta['bytes'] = bytes
226
        md5 = hashlib.md5()
227
        md5.update(etag)
228
        meta['ETag'] = md5.hexdigest().lower()
229

    
230
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
231
    if permissions is None:
232
        return
233
    allowed, perm_path, perms = permissions
234
    if len(perms) == 0:
235
        return
236
    ret = []
237
    r = ','.join(perms.get('read', []))
238
    if r:
239
        ret.append('read=' + r)
240
    w = ','.join(perms.get('write', []))
241
    if w:
242
        ret.append('write=' + w)
243
    meta['X-Object-Sharing'] = '; '.join(ret)
244
    if '/'.join((v_account, v_container, v_object)) != perm_path:
245
        meta['X-Object-Shared-By'] = perm_path
246
    if request.user_uniq != v_account:
247
        meta['X-Object-Allowed-To'] = allowed
248

    
249
def update_public_meta(public, meta):
250
    if not public:
251
        return
252
    meta['X-Object-Public'] = '/public/' + encode_url(public)
253

    
254
def validate_modification_preconditions(request, meta):
255
    """Check that the modified timestamp conforms with the preconditions set."""
256
    
257
    if 'modified' not in meta:
258
        return # TODO: Always return?
259
    
260
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
261
    if if_modified_since is not None:
262
        if_modified_since = parse_http_date_safe(if_modified_since)
263
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
264
        raise NotModified('Resource has not been modified')
265
    
266
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
267
    if if_unmodified_since is not None:
268
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
269
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
270
        raise PreconditionFailed('Resource has been modified')
271

    
272
def validate_matching_preconditions(request, meta):
273
    """Check that the ETag conforms with the preconditions set."""
274
    
275
    etag = meta.get('ETag', None)
276
    
277
    if_match = request.META.get('HTTP_IF_MATCH')
278
    if if_match is not None:
279
        if etag is None:
280
            raise PreconditionFailed('Resource does not exist')
281
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
282
            raise PreconditionFailed('Resource ETag does not match')
283
    
284
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
285
    if if_none_match is not None:
286
        # TODO: If this passes, must ignore If-Modified-Since header.
287
        if etag is not None:
288
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
289
                # TODO: Continue if an If-Modified-Since header is present.
290
                if request.method in ('HEAD', 'GET'):
291
                    raise NotModified('Resource ETag matches')
292
                raise PreconditionFailed('Resource exists or ETag matches')
293

    
294
def split_container_object_string(s):
295
    if not len(s) > 0 or s[0] != '/':
296
        raise ValueError
297
    s = s[1:]
298
    pos = s.find('/')
299
    if pos == -1 or pos == len(s) - 1:
300
        raise ValueError
301
    return s[:pos], s[(pos + 1):]
302

    
303
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
304
    """Copy or move an object."""
305
    
306
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
307
        del(request.META['CONTENT_TYPE'])
308
    meta, permissions, public = get_object_headers(request)
309
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
310
    try:
311
        if move:
312
            version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
313
                                                        dest_account, dest_container, dest_name,
314
                                                        'pithos', meta, False, permissions)
315
        else:
316
            version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
317
                                                        dest_account, dest_container, dest_name,
318
                                                        'pithos', meta, False, permissions, src_version)
319
    except NotAllowedError:
320
        raise Forbidden('Not allowed')
321
    except (NameError, IndexError):
322
        raise ItemNotFound('Container or object does not exist')
323
    except ValueError:
324
        raise BadRequest('Invalid sharing header')
325
    except AttributeError, e:
326
        raise Conflict(simple_list_response(request, e.data))
327
    except QuotaError:
328
        raise RequestEntityTooLarge('Quota exceeded')
329
    if public is not None:
330
        try:
331
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
332
        except NotAllowedError:
333
            raise Forbidden('Not allowed')
334
        except NameError:
335
            raise ItemNotFound('Object does not exist')
336
    return version_id
337

    
338
def get_int_parameter(p):
339
    if p is not None:
340
        try:
341
            p = int(p)
342
        except ValueError:
343
            return None
344
        if p < 0:
345
            return None
346
    return p
347

    
348
def get_content_length(request):
349
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
350
    if content_length is None:
351
        raise LengthRequired('Missing or invalid Content-Length header')
352
    return content_length
353

    
354
def get_range(request, size):
355
    """Parse a Range header from the request.
356
    
357
    Either returns None, when the header is not existent or should be ignored,
358
    or a list of (offset, length) tuples - should be further checked.
359
    """
360
    
361
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
362
    if not ranges.startswith('bytes='):
363
        return None
364
    
365
    ret = []
366
    for r in (x.strip() for x in ranges[6:].split(',')):
367
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
368
        m = p.match(r)
369
        if not m:
370
            return None
371
        offset = m.group('offset')
372
        upto = m.group('upto')
373
        if offset == '' and upto == '':
374
            return None
375
        
376
        if offset != '':
377
            offset = int(offset)
378
            if upto != '':
379
                upto = int(upto)
380
                if offset > upto:
381
                    return None
382
                ret.append((offset, upto - offset + 1))
383
            else:
384
                ret.append((offset, size - offset))
385
        else:
386
            length = int(upto)
387
            ret.append((size - length, length))
388
    
389
    return ret
390

    
391
def get_content_range(request):
392
    """Parse a Content-Range header from the request.
393
    
394
    Either returns None, when the header is not existent or should be ignored,
395
    or an (offset, length, total) tuple - check as length, total may be None.
396
    Returns (None, None, None) if the provided range is '*/*'.
397
    """
398
    
399
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
400
    if not ranges:
401
        return None
402
    
403
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
404
    m = p.match(ranges)
405
    if not m:
406
        if ranges == 'bytes */*':
407
            return (None, None, None)
408
        return None
409
    offset = int(m.group('offset'))
410
    upto = m.group('upto')
411
    total = m.group('total')
412
    if upto != '':
413
        upto = int(upto)
414
    else:
415
        upto = None
416
    if total != '*':
417
        total = int(total)
418
    else:
419
        total = None
420
    if (upto is not None and offset > upto) or \
421
        (total is not None and offset >= total) or \
422
        (total is not None and upto is not None and upto >= total):
423
        return None
424
    
425
    if upto is None:
426
        length = None
427
    else:
428
        length = upto - offset + 1
429
    return (offset, length, total)
430

    
431
def get_sharing(request):
432
    """Parse an X-Object-Sharing header from the request.
433
    
434
    Raises BadRequest on error.
435
    """
436
    
437
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
438
    if permissions is None:
439
        return None
440
    
441
    # TODO: Document or remove '~' replacing.
442
    permissions = permissions.replace('~', '')
443
    
444
    ret = {}
445
    permissions = permissions.replace(' ', '')
446
    if permissions == '':
447
        return ret
448
    for perm in (x for x in permissions.split(';')):
449
        if perm.startswith('read='):
450
            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
451
            if '' in ret['read']:
452
                ret['read'].remove('')
453
            if '*' in ret['read']:
454
                ret['read'] = ['*']
455
            if len(ret['read']) == 0:
456
                raise BadRequest('Bad X-Object-Sharing header value')
457
        elif perm.startswith('write='):
458
            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
459
            if '' in ret['write']:
460
                ret['write'].remove('')
461
            if '*' in ret['write']:
462
                ret['write'] = ['*']
463
            if len(ret['write']) == 0:
464
                raise BadRequest('Bad X-Object-Sharing header value')
465
        else:
466
            raise BadRequest('Bad X-Object-Sharing header value')
467
    
468
    # Keep duplicates only in write list.
469
    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
470
    if dups:
471
        for x in dups:
472
            ret['read'].remove(x)
473
        if len(ret['read']) == 0:
474
            del(ret['read'])
475
    
476
    return ret
477

    
478
def get_public(request):
479
    """Parse an X-Object-Public header from the request.
480
    
481
    Raises BadRequest on error.
482
    """
483
    
484
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
485
    if public is None:
486
        return None
487
    
488
    public = public.replace(' ', '').lower()
489
    if public == 'true':
490
        return True
491
    elif public == 'false' or public == '':
492
        return False
493
    raise BadRequest('Bad X-Object-Public header value')
494

    
495
def raw_input_socket(request):
496
    """Return the socket for reading the rest of the request."""
497
    
498
    server_software = request.META.get('SERVER_SOFTWARE')
499
    if server_software and server_software.startswith('mod_python'):
500
        return request._req
501
    if 'wsgi.input' in request.environ:
502
        return request.environ['wsgi.input']
503
    raise NotImplemented('Unknown server software')
504

    
505
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
506

    
507
def socket_read_iterator(request, length=0, blocksize=4096):
508
    """Return a maximum of blocksize data read from the socket in each iteration.
509
    
510
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
511
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
512
    """
513
    
514
    sock = raw_input_socket(request)
515
    if length < 0: # Chunked transfers
516
        # Small version (server does the dechunking).
517
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
518
            while length < MAX_UPLOAD_SIZE:
519
                data = sock.read(blocksize)
520
                if data == '':
521
                    return
522
                yield data
523
            raise BadRequest('Maximum size is reached')
524
        
525
        # Long version (do the dechunking).
526
        data = ''
527
        while length < MAX_UPLOAD_SIZE:
528
            # Get chunk size.
529
            if hasattr(sock, 'readline'):
530
                chunk_length = sock.readline()
531
            else:
532
                chunk_length = ''
533
                while chunk_length[-1:] != '\n':
534
                    chunk_length += sock.read(1)
535
                chunk_length.strip()
536
            pos = chunk_length.find(';')
537
            if pos >= 0:
538
                chunk_length = chunk_length[:pos]
539
            try:
540
                chunk_length = int(chunk_length, 16)
541
            except Exception, e:
542
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
543
            # Check if done.
544
            if chunk_length == 0:
545
                if len(data) > 0:
546
                    yield data
547
                return
548
            # Get the actual data.
549
            while chunk_length > 0:
550
                chunk = sock.read(min(chunk_length, blocksize))
551
                chunk_length -= len(chunk)
552
                if length > 0:
553
                    length += len(chunk)
554
                data += chunk
555
                if len(data) >= blocksize:
556
                    ret = data[:blocksize]
557
                    data = data[blocksize:]
558
                    yield ret
559
            sock.read(2) # CRLF
560
        raise BadRequest('Maximum size is reached')
561
    else:
562
        if length > MAX_UPLOAD_SIZE:
563
            raise BadRequest('Maximum size is reached')
564
        while length > 0:
565
            data = sock.read(min(length, blocksize))
566
            if not data:
567
                raise BadRequest()
568
            length -= len(data)
569
            yield data
570

    
571
class SaveToBackendHandler(FileUploadHandler):
572
    """Handle a file from an HTML form the django way."""
573
    
574
    def __init__(self, request=None):
575
        super(SaveToBackendHandler, self).__init__(request)
576
        self.backend = request.backend
577
    
578
    def put_data(self, length):
579
        if len(self.data) >= length:
580
            block = self.data[:length]
581
            self.file.hashmap.append(self.backend.put_block(block))
582
            self.md5.update(block)
583
            self.data = self.data[length:]
584
    
585
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
586
        self.md5 = hashlib.md5()        
587
        self.data = ''
588
        self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
589
        self.file.size = 0
590
        self.file.hashmap = []
591
    
592
    def receive_data_chunk(self, raw_data, start):
593
        self.data += raw_data
594
        self.file.size += len(raw_data)
595
        self.put_data(self.request.backend.block_size)
596
        return None
597
    
598
    def file_complete(self, file_size):
599
        l = len(self.data)
600
        if l > 0:
601
            self.put_data(l)
602
        self.file.etag = self.md5.hexdigest().lower()
603
        return self.file
604

    
605
class ObjectWrapper(object):
606
    """Return the object's data block-per-block in each iteration.
607
    
608
    Read from the object using the offset and length provided in each entry of the range list.
609
    """
610
    
611
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
612
        self.backend = backend
613
        self.ranges = ranges
614
        self.sizes = sizes
615
        self.hashmaps = hashmaps
616
        self.boundary = boundary
617
        self.size = sum(self.sizes)
618
        
619
        self.file_index = 0
620
        self.block_index = 0
621
        self.block_hash = -1
622
        self.block = ''
623
        
624
        self.range_index = -1
625
        self.offset, self.length = self.ranges[0]
626
    
627
    def __iter__(self):
628
        return self
629
    
630
    def part_iterator(self):
631
        if self.length > 0:
632
            # Get the file for the current offset.
633
            file_size = self.sizes[self.file_index]
634
            while self.offset >= file_size:
635
                self.offset -= file_size
636
                self.file_index += 1
637
                file_size = self.sizes[self.file_index]
638
            
639
            # Get the block for the current position.
640
            self.block_index = int(self.offset / self.backend.block_size)
641
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
642
                self.block_hash = self.hashmaps[self.file_index][self.block_index]
643
                try:
644
                    self.block = self.backend.get_block(self.block_hash)
645
                except NameError:
646
                    raise ItemNotFound('Block does not exist')
647
            
648
            # Get the data from the block.
649
            bo = self.offset % self.backend.block_size
650
            bl = min(self.length, len(self.block) - bo)
651
            data = self.block[bo:bo + bl]
652
            self.offset += bl
653
            self.length -= bl
654
            return data
655
        else:
656
            raise StopIteration
657
    
658
    def next(self):
659
        if len(self.ranges) == 1:
660
            return self.part_iterator()
661
        if self.range_index == len(self.ranges):
662
            raise StopIteration
663
        try:
664
            if self.range_index == -1:
665
                raise StopIteration
666
            return self.part_iterator()
667
        except StopIteration:
668
            self.range_index += 1
669
            out = []
670
            if self.range_index < len(self.ranges):
671
                # Part header.
672
                self.offset, self.length = self.ranges[self.range_index]
673
                self.file_index = 0
674
                if self.range_index > 0:
675
                    out.append('')
676
                out.append('--' + self.boundary)
677
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
678
                out.append('Content-Transfer-Encoding: binary')
679
                out.append('')
680
                out.append('')
681
                return '\r\n'.join(out)
682
            else:
683
                # Footer.
684
                out.append('')
685
                out.append('--' + self.boundary + '--')
686
                out.append('')
687
                return '\r\n'.join(out)
688

    
689
def object_data_response(request, sizes, hashmaps, meta, public=False):
690
    """Get the HttpResponse object for replying with the object's data."""
691
    
692
    # Range handling.
693
    size = sum(sizes)
694
    ranges = get_range(request, size)
695
    if ranges is None:
696
        ranges = [(0, size)]
697
        ret = 200
698
    else:
699
        check = [True for offset, length in ranges if
700
                    length <= 0 or length > size or
701
                    offset < 0 or offset >= size or
702
                    offset + length > size]
703
        if len(check) > 0:
704
            raise RangeNotSatisfiable('Requested range exceeds object limits')
705
        ret = 206
706
        if_range = request.META.get('HTTP_IF_RANGE')
707
        if if_range:
708
            try:
709
                # Modification time has passed instead.
710
                last_modified = parse_http_date(if_range)
711
                if last_modified != meta['modified']:
712
                    ranges = [(0, size)]
713
                    ret = 200
714
            except ValueError:
715
                if if_range != meta['ETag']:
716
                    ranges = [(0, size)]
717
                    ret = 200
718
    
719
    if ret == 206 and len(ranges) > 1:
720
        boundary = uuid.uuid4().hex
721
    else:
722
        boundary = ''
723
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
724
    response = HttpResponse(wrapper, status=ret)
725
    put_object_headers(response, meta, public)
726
    if ret == 206:
727
        if len(ranges) == 1:
728
            offset, length = ranges[0]
729
            response['Content-Length'] = length # Update with the correct length.
730
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
731
        else:
732
            del(response['Content-Length'])
733
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
734
    return response
735

    
736
def put_object_block(request, hashmap, data, offset):
737
    """Put one block of data at the given offset."""
738
    
739
    bi = int(offset / request.backend.block_size)
740
    bo = offset % request.backend.block_size
741
    bl = min(len(data), request.backend.block_size - bo)
742
    if bi < len(hashmap):
743
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
744
    else:
745
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
746
    return bl # Return ammount of data written.
747

    
748
def hashmap_md5(request, hashmap, size):
749
    """Produce the MD5 sum from the data in the hashmap."""
750
    
751
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
752
    md5 = hashlib.md5()
753
    bs = request.backend.block_size
754
    for bi, hash in enumerate(hashmap):
755
        data = request.backend.get_block(hash)
756
        if bi == len(hashmap) - 1:
757
            bs = size % bs
758
        pad = bs - min(len(data), bs)
759
        md5.update(data + ('\x00' * pad))
760
    return md5.hexdigest().lower()
761

    
762
def simple_list_response(request, l):
763
    if request.serialization == 'text':
764
        return '\n'.join(l) + '\n'
765
    if request.serialization == 'xml':
766
        return render_to_string('items.xml', {'items': l})
767
    if request.serialization == 'json':
768
        return json.dumps(l)
769

    
770
def get_backend():
771
    backend = connect_backend(db_module=settings.BACKEND_DB_MODULE,
772
                              db_connection=settings.BACKEND_DB_CONNECTION,
773
                              block_module=settings.BACKEND_BLOCK_MODULE,
774
                              block_path=settings.BACKEND_BLOCK_PATH)
775
    backend.default_policy['quota'] = settings.BACKEND_QUOTA
776
    backend.default_policy['versioning'] = settings.BACKEND_VERSIONING
777
    return backend
778

    
779
def update_request_headers(request):
780
    # Handle URL-encoded keys and values.
781
    # Handle URL-encoded keys and values.
782
    meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
783
    if len(meta) > 90:
784
        raise BadRequest('Too many headers.')
785
    for k, v in meta.iteritems():
786
        if len(k) > 128:
787
            raise BadRequest('Header name too large.')
788
        if len(v) > 256:
789
            raise BadRequest('Header value too large.')
790
        try:
791
            k.decode('ascii')
792
            v.decode('ascii')
793
        except UnicodeDecodeError:
794
            raise BadRequest('Bad character in headers.')
795
        if '%' in k or '%' in v:
796
            del(request.META[k])
797
            request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
798

    
799
def update_response_headers(request, response):
800
    if request.serialization == 'xml':
801
        response['Content-Type'] = 'application/xml; charset=UTF-8'
802
    elif request.serialization == 'json':
803
        response['Content-Type'] = 'application/json; charset=UTF-8'
804
    elif not response['Content-Type']:
805
        response['Content-Type'] = 'text/plain; charset=UTF-8'
806
    
807
    if (not response.has_header('Content-Length') and
808
        not (response.has_header('Content-Type') and
809
             response['Content-Type'].startswith('multipart/byteranges'))):
810
        response['Content-Length'] = len(response.content)
811
    
812
    # URL-encode unicode in headers.
813
    meta = response.items()
814
    for k, v in meta:
815
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
816
            k.startswith('X-Object-') or k.startswith('Content-')):
817
            del(response[k])
818
            response[quote(k)] = quote(v, safe='/=,:@; ')
819
    
820
    if settings.TEST:
821
        response['Date'] = format_date_time(time())
822

    
823
def render_fault(request, fault):
824
    if isinstance(fault, InternalServerError) and (settings.DEBUG or settings.TEST):
825
        fault.details = format_exc(fault)
826
    
827
    request.serialization = 'text'
828
    data = fault.message + '\n'
829
    if fault.details:
830
        data += '\n' + fault.details
831
    response = HttpResponse(data, status=fault.code)
832
    update_response_headers(request, response)
833
    return response
834

    
835
def request_serialization(request, format_allowed=False):
836
    """Return the serialization format requested.
837
    
838
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
839
    """
840
    
841
    if not format_allowed:
842
        return 'text'
843
    
844
    format = request.GET.get('format')
845
    if format == 'json':
846
        return 'json'
847
    elif format == 'xml':
848
        return 'xml'
849
    
850
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
851
        accept, sep, rest = item.strip().partition(';')
852
        if accept == 'application/json':
853
            return 'json'
854
        elif accept == 'application/xml' or accept == 'text/xml':
855
            return 'xml'
856
    
857
    return 'text'
858

    
859
def api_method(http_method=None, format_allowed=False, user_required=True):
860
    """Decorator function for views that implement an API method."""
861
    
862
    def decorator(func):
863
        @wraps(func)
864
        def wrapper(request, *args, **kwargs):
865
            try:
866
                if http_method and request.method != http_method:
867
                    raise BadRequest('Method not allowed.')
868
                if user_required and getattr(request, 'user', None) is None:
869
                    raise Unauthorized('Access denied')
870
                
871
                # The args variable may contain up to (account, container, object).
872
                if len(args) > 1 and len(args[1]) > 256:
873
                    raise BadRequest('Container name too large.')
874
                if len(args) > 2 and len(args[2]) > 1024:
875
                    raise BadRequest('Object name too large.')
876
                
877
                # Format and check headers.
878
                update_request_headers(request)
879
                
880
                # Fill in custom request variables.
881
                request.serialization = request_serialization(request, format_allowed)
882
                request.backend = get_backend()
883
                
884
                response = func(request, *args, **kwargs)
885
                update_response_headers(request, response)
886
                return response
887
            except Fault, fault:
888
                return render_fault(request, fault)
889
            except BaseException, e:
890
                logger.exception('Unexpected error: %s' % e)
891
                fault = InternalServerError('Unexpected error')
892
                return render_fault(request, fault)
893
            finally:
894
                if getattr(request, 'backend', None) is not None:
895
                    request.backend.close()
896
        return wrapper
897
    return decorator