Statistics
| Branch: | Tag: | Revision:

root / pithos / api / util.py @ 371d907a

History | View | Annotate | Download (34.6 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from pithos.lib.compat import parse_http_date_safe, parse_http_date
52

    
53
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
54
                                Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
55
                                RangeNotSatisfiable, InternalServerError, NotImplemented)
56
from pithos.api.short_url import encode_url
57
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
58
                                    BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
59
                                    BACKEND_QUEUE_MODULE, BACKEND_QUEUE_CONNECTION,
60
                                    BACKEND_QUOTA, BACKEND_VERSIONING)
61
from pithos.backends import connect_backend
62
from pithos.backends.base import NotAllowedError, QuotaError
63

    
64
import logging
65
import re
66
import hashlib
67
import uuid
68
import decimal
69

    
70

    
71
logger = logging.getLogger(__name__)
72

    
73

    
74
class UTC(tzinfo):
75
   def utcoffset(self, dt):
76
       return timedelta(0)
77

    
78
   def tzname(self, dt):
79
       return 'UTC'
80

    
81
   def dst(self, dt):
82
       return timedelta(0)
83

    
84
def json_encode_decimal(obj):
85
    if isinstance(obj, decimal.Decimal):
86
        return str(obj)
87
    raise TypeError(repr(obj) + " is not JSON serializable")
88

    
89
def isoformat(d):
90
   """Return an ISO8601 date string that includes a timezone."""
91

    
92
   return d.replace(tzinfo=UTC()).isoformat()
93

    
94
def rename_meta_key(d, old, new):
95
    if old not in d:
96
        return
97
    d[new] = d[old]
98
    del(d[old])
99

    
100
def printable_header_dict(d):
101
    """Format a meta dictionary for printing out json/xml.
102
    
103
    Convert all keys to lower case and replace dashes with underscores.
104
    Format 'last_modified' timestamp.
105
    """
106
    
107
    if 'last_modified' in d and d['last_modified']:
108
        d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
109
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
110

    
111
def format_header_key(k):
112
    """Convert underscores to dashes and capitalize intra-dash strings."""
113
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
114

    
115
def get_header_prefix(request, prefix):
116
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
117
    
118
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
119
    # TODO: Document or remove '~' replacing.
120
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
121

    
122
def get_account_headers(request):
123
    meta = get_header_prefix(request, 'X-Account-Meta-')
124
    groups = {}
125
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
126
        n = k[16:].lower()
127
        if '-' in n or '_' in n:
128
            raise BadRequest('Bad characters in group name')
129
        groups[n] = v.replace(' ', '').split(',')
130
        while '' in groups[n]:
131
            groups[n].remove('')
132
    return meta, groups
133

    
134
def put_account_headers(response, meta, groups, policy):
135
    if 'count' in meta:
136
        response['X-Account-Container-Count'] = meta['count']
137
    if 'bytes' in meta:
138
        response['X-Account-Bytes-Used'] = meta['bytes']
139
    response['Last-Modified'] = http_date(int(meta['modified']))
140
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
141
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
142
    if 'until_timestamp' in meta:
143
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
144
    for k, v in groups.iteritems():
145
        k = smart_str(k, strings_only=True)
146
        k = format_header_key('X-Account-Group-' + k)
147
        v = smart_str(','.join(v), strings_only=True)
148
        response[k] = v
149
    for k, v in policy.iteritems():
150
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
151

    
152
def get_container_headers(request):
153
    meta = get_header_prefix(request, 'X-Container-Meta-')
154
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
155
    return meta, policy
156

    
157
def put_container_headers(request, response, meta, policy):
158
    if 'count' in meta:
159
        response['X-Container-Object-Count'] = meta['count']
160
    if 'bytes' in meta:
161
        response['X-Container-Bytes-Used'] = meta['bytes']
162
    response['Last-Modified'] = http_date(int(meta['modified']))
163
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
164
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
165
    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
166
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
167
    response['X-Container-Block-Size'] = request.backend.block_size
168
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
169
    if 'until_timestamp' in meta:
170
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
171
    for k, v in policy.iteritems():
172
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
173

    
174
def get_object_headers(request):
175
    content_type = request.META.get('CONTENT_TYPE', None)
176
    meta = get_header_prefix(request, 'X-Object-Meta-')
177
    if request.META.get('HTTP_CONTENT_ENCODING'):
178
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
179
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
180
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
181
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
182
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
183
    return content_type, meta, get_sharing(request), get_public(request)
184

    
185
def put_object_headers(response, meta, restricted=False):
186
    response['ETag'] = meta['checksum']
187
    response['Content-Length'] = meta['bytes']
188
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
189
    response['Last-Modified'] = http_date(int(meta['modified']))
190
    if not restricted:
191
        response['X-Object-Hash'] = meta['hash']
192
        response['X-Object-UUID'] = meta['uuid']
193
        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
194
        response['X-Object-Version'] = meta['version']
195
        response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
196
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
197
            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
198
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
199
                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
200
                  'X-Object-Public'):
201
            if k in meta:
202
                response[k] = smart_str(meta[k], strings_only=True)
203
    else:
204
        for k in ('Content-Encoding', 'Content-Disposition'):
205
            if k in meta:
206
                response[k] = smart_str(meta[k], strings_only=True)
207

    
208
def update_manifest_meta(request, v_account, meta):
209
    """Update metadata if the object has an X-Object-Manifest."""
210
    
211
    if 'X-Object-Manifest' in meta:
212
        etag = ''
213
        bytes = 0
214
        try:
215
            src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
216
            objects = request.backend.list_objects(request.user_uniq, v_account,
217
                                src_container, prefix=src_name, virtual=False)
218
            for x in objects:
219
                src_meta = request.backend.get_object_meta(request.user_uniq,
220
                                        v_account, src_container, x[0], 'pithos', x[1])
221
                etag += src_meta['checksum']
222
                bytes += src_meta['bytes']
223
        except:
224
            # Ignore errors.
225
            return
226
        meta['bytes'] = bytes
227
        md5 = hashlib.md5()
228
        md5.update(etag)
229
        meta['checksum'] = md5.hexdigest().lower()
230

    
231
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
232
    if permissions is None:
233
        return
234
    allowed, perm_path, perms = permissions
235
    if len(perms) == 0:
236
        return
237
    ret = []
238
    r = ','.join(perms.get('read', []))
239
    if r:
240
        ret.append('read=' + r)
241
    w = ','.join(perms.get('write', []))
242
    if w:
243
        ret.append('write=' + w)
244
    meta['X-Object-Sharing'] = '; '.join(ret)
245
    if '/'.join((v_account, v_container, v_object)) != perm_path:
246
        meta['X-Object-Shared-By'] = perm_path
247
    if request.user_uniq != v_account:
248
        meta['X-Object-Allowed-To'] = allowed
249

    
250
def update_public_meta(public, meta):
251
    if not public:
252
        return
253
    meta['X-Object-Public'] = '/public/' + encode_url(public)
254

    
255
def validate_modification_preconditions(request, meta):
256
    """Check that the modified timestamp conforms with the preconditions set."""
257
    
258
    if 'modified' not in meta:
259
        return # TODO: Always return?
260
    
261
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
262
    if if_modified_since is not None:
263
        if_modified_since = parse_http_date_safe(if_modified_since)
264
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
265
        raise NotModified('Resource has not been modified')
266
    
267
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
268
    if if_unmodified_since is not None:
269
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
270
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
271
        raise PreconditionFailed('Resource has been modified')
272

    
273
def validate_matching_preconditions(request, meta):
274
    """Check that the ETag conforms with the preconditions set."""
275
    
276
    etag = meta['checksum']
277
    if not etag:
278
        etag = None
279
    
280
    if_match = request.META.get('HTTP_IF_MATCH')
281
    if if_match is not None:
282
        if etag is None:
283
            raise PreconditionFailed('Resource does not exist')
284
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
285
            raise PreconditionFailed('Resource ETag does not match')
286
    
287
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
288
    if if_none_match is not None:
289
        # TODO: If this passes, must ignore If-Modified-Since header.
290
        if etag is not None:
291
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
292
                # TODO: Continue if an If-Modified-Since header is present.
293
                if request.method in ('HEAD', 'GET'):
294
                    raise NotModified('Resource ETag matches')
295
                raise PreconditionFailed('Resource exists or ETag matches')
296

    
297
def split_container_object_string(s):
298
    if not len(s) > 0 or s[0] != '/':
299
        raise ValueError
300
    s = s[1:]
301
    pos = s.find('/')
302
    if pos == -1 or pos == len(s) - 1:
303
        raise ValueError
304
    return s[:pos], s[(pos + 1):]
305

    
306
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
307
    """Copy or move an object."""
308
    
309
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
310
        del(request.META['CONTENT_TYPE'])
311
    content_type, meta, permissions, public = get_object_headers(request)
312
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
313
    try:
314
        if move:
315
            version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
316
                                                        dest_account, dest_container, dest_name,
317
                                                        content_type, 'pithos', meta, False, permissions)
318
        else:
319
            version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
320
                                                        dest_account, dest_container, dest_name,
321
                                                        content_type, 'pithos', meta, False, permissions, src_version)
322
    except NotAllowedError:
323
        raise Forbidden('Not allowed')
324
    except (NameError, IndexError):
325
        raise ItemNotFound('Container or object does not exist')
326
    except ValueError:
327
        raise BadRequest('Invalid sharing header')
328
    except QuotaError:
329
        raise RequestEntityTooLarge('Quota exceeded')
330
    if public is not None:
331
        try:
332
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
333
        except NotAllowedError:
334
            raise Forbidden('Not allowed')
335
        except NameError:
336
            raise ItemNotFound('Object does not exist')
337
    return version_id
338

    
339
def get_int_parameter(p):
340
    if p is not None:
341
        try:
342
            p = int(p)
343
        except ValueError:
344
            return None
345
        if p < 0:
346
            return None
347
    return p
348

    
349
def get_content_length(request):
350
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
351
    if content_length is None:
352
        raise LengthRequired('Missing or invalid Content-Length header')
353
    return content_length
354

    
355
def get_range(request, size):
356
    """Parse a Range header from the request.
357
    
358
    Either returns None, when the header is not existent or should be ignored,
359
    or a list of (offset, length) tuples - should be further checked.
360
    """
361
    
362
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
363
    if not ranges.startswith('bytes='):
364
        return None
365
    
366
    ret = []
367
    for r in (x.strip() for x in ranges[6:].split(',')):
368
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
369
        m = p.match(r)
370
        if not m:
371
            return None
372
        offset = m.group('offset')
373
        upto = m.group('upto')
374
        if offset == '' and upto == '':
375
            return None
376
        
377
        if offset != '':
378
            offset = int(offset)
379
            if upto != '':
380
                upto = int(upto)
381
                if offset > upto:
382
                    return None
383
                ret.append((offset, upto - offset + 1))
384
            else:
385
                ret.append((offset, size - offset))
386
        else:
387
            length = int(upto)
388
            ret.append((size - length, length))
389
    
390
    return ret
391

    
392
def get_content_range(request):
393
    """Parse a Content-Range header from the request.
394
    
395
    Either returns None, when the header is not existent or should be ignored,
396
    or an (offset, length, total) tuple - check as length, total may be None.
397
    Returns (None, None, None) if the provided range is '*/*'.
398
    """
399
    
400
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
401
    if not ranges:
402
        return None
403
    
404
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
405
    m = p.match(ranges)
406
    if not m:
407
        if ranges == 'bytes */*':
408
            return (None, None, None)
409
        return None
410
    offset = int(m.group('offset'))
411
    upto = m.group('upto')
412
    total = m.group('total')
413
    if upto != '':
414
        upto = int(upto)
415
    else:
416
        upto = None
417
    if total != '*':
418
        total = int(total)
419
    else:
420
        total = None
421
    if (upto is not None and offset > upto) or \
422
        (total is not None and offset >= total) or \
423
        (total is not None and upto is not None and upto >= total):
424
        return None
425
    
426
    if upto is None:
427
        length = None
428
    else:
429
        length = upto - offset + 1
430
    return (offset, length, total)
431

    
432
def get_sharing(request):
433
    """Parse an X-Object-Sharing header from the request.
434
    
435
    Raises BadRequest on error.
436
    """
437
    
438
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
439
    if permissions is None:
440
        return None
441
    
442
    # TODO: Document or remove '~' replacing.
443
    permissions = permissions.replace('~', '')
444
    
445
    ret = {}
446
    permissions = permissions.replace(' ', '')
447
    if permissions == '':
448
        return ret
449
    for perm in (x for x in permissions.split(';')):
450
        if perm.startswith('read='):
451
            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
452
            if '' in ret['read']:
453
                ret['read'].remove('')
454
            if '*' in ret['read']:
455
                ret['read'] = ['*']
456
            if len(ret['read']) == 0:
457
                raise BadRequest('Bad X-Object-Sharing header value')
458
        elif perm.startswith('write='):
459
            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
460
            if '' in ret['write']:
461
                ret['write'].remove('')
462
            if '*' in ret['write']:
463
                ret['write'] = ['*']
464
            if len(ret['write']) == 0:
465
                raise BadRequest('Bad X-Object-Sharing header value')
466
        else:
467
            raise BadRequest('Bad X-Object-Sharing header value')
468
    
469
    # Keep duplicates only in write list.
470
    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
471
    if dups:
472
        for x in dups:
473
            ret['read'].remove(x)
474
        if len(ret['read']) == 0:
475
            del(ret['read'])
476
    
477
    return ret
478

    
479
def get_public(request):
480
    """Parse an X-Object-Public header from the request.
481
    
482
    Raises BadRequest on error.
483
    """
484
    
485
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
486
    if public is None:
487
        return None
488
    
489
    public = public.replace(' ', '').lower()
490
    if public == 'true':
491
        return True
492
    elif public == 'false' or public == '':
493
        return False
494
    raise BadRequest('Bad X-Object-Public header value')
495

    
496
def raw_input_socket(request):
497
    """Return the socket for reading the rest of the request."""
498
    
499
    server_software = request.META.get('SERVER_SOFTWARE')
500
    if server_software and server_software.startswith('mod_python'):
501
        return request._req
502
    if 'wsgi.input' in request.environ:
503
        return request.environ['wsgi.input']
504
    raise NotImplemented('Unknown server software')
505

    
506
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
507

    
508
def socket_read_iterator(request, length=0, blocksize=4096):
509
    """Return a maximum of blocksize data read from the socket in each iteration.
510
    
511
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
512
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
513
    """
514
    
515
    sock = raw_input_socket(request)
516
    if length < 0: # Chunked transfers
517
        # Small version (server does the dechunking).
518
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
519
            while length < MAX_UPLOAD_SIZE:
520
                data = sock.read(blocksize)
521
                if data == '':
522
                    return
523
                yield data
524
            raise BadRequest('Maximum size is reached')
525
        
526
        # Long version (do the dechunking).
527
        data = ''
528
        while length < MAX_UPLOAD_SIZE:
529
            # Get chunk size.
530
            if hasattr(sock, 'readline'):
531
                chunk_length = sock.readline()
532
            else:
533
                chunk_length = ''
534
                while chunk_length[-1:] != '\n':
535
                    chunk_length += sock.read(1)
536
                chunk_length.strip()
537
            pos = chunk_length.find(';')
538
            if pos >= 0:
539
                chunk_length = chunk_length[:pos]
540
            try:
541
                chunk_length = int(chunk_length, 16)
542
            except Exception, e:
543
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
544
            # Check if done.
545
            if chunk_length == 0:
546
                if len(data) > 0:
547
                    yield data
548
                return
549
            # Get the actual data.
550
            while chunk_length > 0:
551
                chunk = sock.read(min(chunk_length, blocksize))
552
                chunk_length -= len(chunk)
553
                if length > 0:
554
                    length += len(chunk)
555
                data += chunk
556
                if len(data) >= blocksize:
557
                    ret = data[:blocksize]
558
                    data = data[blocksize:]
559
                    yield ret
560
            sock.read(2) # CRLF
561
        raise BadRequest('Maximum size is reached')
562
    else:
563
        if length > MAX_UPLOAD_SIZE:
564
            raise BadRequest('Maximum size is reached')
565
        while length > 0:
566
            data = sock.read(min(length, blocksize))
567
            if not data:
568
                raise BadRequest()
569
            length -= len(data)
570
            yield data
571

    
572
class SaveToBackendHandler(FileUploadHandler):
573
    """Handle a file from an HTML form the django way."""
574
    
575
    def __init__(self, request=None):
576
        super(SaveToBackendHandler, self).__init__(request)
577
        self.backend = request.backend
578
    
579
    def put_data(self, length):
580
        if len(self.data) >= length:
581
            block = self.data[:length]
582
            self.file.hashmap.append(self.backend.put_block(block))
583
            self.md5.update(block)
584
            self.data = self.data[length:]
585
    
586
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
587
        self.md5 = hashlib.md5()        
588
        self.data = ''
589
        self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
590
        self.file.size = 0
591
        self.file.hashmap = []
592
    
593
    def receive_data_chunk(self, raw_data, start):
594
        self.data += raw_data
595
        self.file.size += len(raw_data)
596
        self.put_data(self.request.backend.block_size)
597
        return None
598
    
599
    def file_complete(self, file_size):
600
        l = len(self.data)
601
        if l > 0:
602
            self.put_data(l)
603
        self.file.etag = self.md5.hexdigest().lower()
604
        return self.file
605

    
606
class ObjectWrapper(object):
607
    """Return the object's data block-per-block in each iteration.
608
    
609
    Read from the object using the offset and length provided in each entry of the range list.
610
    """
611
    
612
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
613
        self.backend = backend
614
        self.ranges = ranges
615
        self.sizes = sizes
616
        self.hashmaps = hashmaps
617
        self.boundary = boundary
618
        self.size = sum(self.sizes)
619
        
620
        self.file_index = 0
621
        self.block_index = 0
622
        self.block_hash = -1
623
        self.block = ''
624
        
625
        self.range_index = -1
626
        self.offset, self.length = self.ranges[0]
627
    
628
    def __iter__(self):
629
        return self
630
    
631
    def part_iterator(self):
632
        if self.length > 0:
633
            # Get the file for the current offset.
634
            file_size = self.sizes[self.file_index]
635
            while self.offset >= file_size:
636
                self.offset -= file_size
637
                self.file_index += 1
638
                file_size = self.sizes[self.file_index]
639
            
640
            # Get the block for the current position.
641
            self.block_index = int(self.offset / self.backend.block_size)
642
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
643
                self.block_hash = self.hashmaps[self.file_index][self.block_index]
644
                try:
645
                    self.block = self.backend.get_block(self.block_hash)
646
                except NameError:
647
                    raise ItemNotFound('Block does not exist')
648
            
649
            # Get the data from the block.
650
            bo = self.offset % self.backend.block_size
651
            bl = min(self.length, len(self.block) - bo)
652
            data = self.block[bo:bo + bl]
653
            self.offset += bl
654
            self.length -= bl
655
            return data
656
        else:
657
            raise StopIteration
658
    
659
    def next(self):
660
        if len(self.ranges) == 1:
661
            return self.part_iterator()
662
        if self.range_index == len(self.ranges):
663
            raise StopIteration
664
        try:
665
            if self.range_index == -1:
666
                raise StopIteration
667
            return self.part_iterator()
668
        except StopIteration:
669
            self.range_index += 1
670
            out = []
671
            if self.range_index < len(self.ranges):
672
                # Part header.
673
                self.offset, self.length = self.ranges[self.range_index]
674
                self.file_index = 0
675
                if self.range_index > 0:
676
                    out.append('')
677
                out.append('--' + self.boundary)
678
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
679
                out.append('Content-Transfer-Encoding: binary')
680
                out.append('')
681
                out.append('')
682
                return '\r\n'.join(out)
683
            else:
684
                # Footer.
685
                out.append('')
686
                out.append('--' + self.boundary + '--')
687
                out.append('')
688
                return '\r\n'.join(out)
689

    
690
def object_data_response(request, sizes, hashmaps, meta, public=False):
691
    """Get the HttpResponse object for replying with the object's data."""
692
    
693
    # Range handling.
694
    size = sum(sizes)
695
    ranges = get_range(request, size)
696
    if ranges is None:
697
        ranges = [(0, size)]
698
        ret = 200
699
    else:
700
        check = [True for offset, length in ranges if
701
                    length <= 0 or length > size or
702
                    offset < 0 or offset >= size or
703
                    offset + length > size]
704
        if len(check) > 0:
705
            raise RangeNotSatisfiable('Requested range exceeds object limits')
706
        ret = 206
707
        if_range = request.META.get('HTTP_IF_RANGE')
708
        if if_range:
709
            try:
710
                # Modification time has passed instead.
711
                last_modified = parse_http_date(if_range)
712
                if last_modified != meta['modified']:
713
                    ranges = [(0, size)]
714
                    ret = 200
715
            except ValueError:
716
                if if_range != meta['checksum']:
717
                    ranges = [(0, size)]
718
                    ret = 200
719
    
720
    if ret == 206 and len(ranges) > 1:
721
        boundary = uuid.uuid4().hex
722
    else:
723
        boundary = ''
724
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
725
    response = HttpResponse(wrapper, status=ret)
726
    put_object_headers(response, meta, public)
727
    if ret == 206:
728
        if len(ranges) == 1:
729
            offset, length = ranges[0]
730
            response['Content-Length'] = length # Update with the correct length.
731
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
732
        else:
733
            del(response['Content-Length'])
734
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
735
    return response
736

    
737
def put_object_block(request, hashmap, data, offset):
738
    """Put one block of data at the given offset."""
739
    
740
    bi = int(offset / request.backend.block_size)
741
    bo = offset % request.backend.block_size
742
    bl = min(len(data), request.backend.block_size - bo)
743
    if bi < len(hashmap):
744
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
745
    else:
746
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
747
    return bl # Return ammount of data written.
748

    
749
def hashmap_md5(request, hashmap, size):
750
    """Produce the MD5 sum from the data in the hashmap."""
751
    
752
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
753
    md5 = hashlib.md5()
754
    bs = request.backend.block_size
755
    for bi, hash in enumerate(hashmap):
756
        data = request.backend.get_block(hash)
757
        if bi == len(hashmap) - 1:
758
            bs = size % bs
759
        pad = bs - min(len(data), bs)
760
        md5.update(data + ('\x00' * pad))
761
    return md5.hexdigest().lower()
762

    
763
def simple_list_response(request, l):
764
    if request.serialization == 'text':
765
        return '\n'.join(l) + '\n'
766
    if request.serialization == 'xml':
767
        return render_to_string('items.xml', {'items': l})
768
    if request.serialization == 'json':
769
        return json.dumps(l)
770

    
771
def get_backend():
772
    backend = connect_backend(db_module=BACKEND_DB_MODULE,
773
                              db_connection=BACKEND_DB_CONNECTION,
774
                              block_module=BACKEND_BLOCK_MODULE,
775
                              block_path=BACKEND_BLOCK_PATH,
776
                              queue_module=BACKEND_QUEUE_MODULE,
777
                              queue_connection=BACKEND_QUEUE_CONNECTION)
778
    backend.default_policy['quota'] = BACKEND_QUOTA
779
    backend.default_policy['versioning'] = BACKEND_VERSIONING
780
    return backend
781

    
782
def update_request_headers(request):
783
    # Handle URL-encoded keys and values.
784
    # Handle URL-encoded keys and values.
785
    meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
786
    if len(meta) > 90:
787
        raise BadRequest('Too many headers.')
788
    for k, v in meta.iteritems():
789
        if len(k) > 128:
790
            raise BadRequest('Header name too large.')
791
        if len(v) > 256:
792
            raise BadRequest('Header value too large.')
793
        try:
794
            k.decode('ascii')
795
            v.decode('ascii')
796
        except UnicodeDecodeError:
797
            raise BadRequest('Bad character in headers.')
798
        if '%' in k or '%' in v:
799
            del(request.META[k])
800
            request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
801

    
802
def update_response_headers(request, response):
803
    if request.serialization == 'xml':
804
        response['Content-Type'] = 'application/xml; charset=UTF-8'
805
    elif request.serialization == 'json':
806
        response['Content-Type'] = 'application/json; charset=UTF-8'
807
    elif not response['Content-Type']:
808
        response['Content-Type'] = 'text/plain; charset=UTF-8'
809
    
810
    if (not response.has_header('Content-Length') and
811
        not (response.has_header('Content-Type') and
812
             response['Content-Type'].startswith('multipart/byteranges'))):
813
        response['Content-Length'] = len(response.content)
814
    
815
    # URL-encode unicode in headers.
816
    meta = response.items()
817
    for k, v in meta:
818
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
819
            k.startswith('X-Object-') or k.startswith('Content-')):
820
            del(response[k])
821
            response[quote(k)] = quote(v, safe='/=,:@; ')
822

    
823
def render_fault(request, fault):
824
    if isinstance(fault, InternalServerError) and settings.DEBUG:
825
        fault.details = format_exc(fault)
826
    
827
    request.serialization = 'text'
828
    data = fault.message + '\n'
829
    if fault.details:
830
        data += '\n' + fault.details
831
    response = HttpResponse(data, status=fault.code)
832
    update_response_headers(request, response)
833
    return response
834

    
835
def request_serialization(request, format_allowed=False):
836
    """Return the serialization format requested.
837
    
838
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
839
    """
840
    
841
    if not format_allowed:
842
        return 'text'
843
    
844
    format = request.GET.get('format')
845
    if format == 'json':
846
        return 'json'
847
    elif format == 'xml':
848
        return 'xml'
849
    
850
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
851
        accept, sep, rest = item.strip().partition(';')
852
        if accept == 'application/json':
853
            return 'json'
854
        elif accept == 'application/xml' or accept == 'text/xml':
855
            return 'xml'
856
    
857
    return 'text'
858

    
859
def api_method(http_method=None, format_allowed=False, user_required=True):
860
    """Decorator function for views that implement an API method."""
861
    
862
    def decorator(func):
863
        @wraps(func)
864
        def wrapper(request, *args, **kwargs):
865
            try:
866
                if http_method and request.method != http_method:
867
                    raise BadRequest('Method not allowed.')
868
                if user_required and getattr(request, 'user', None) is None:
869
                    raise Unauthorized('Access denied')
870
                
871
                # The args variable may contain up to (account, container, object).
872
                if len(args) > 1 and len(args[1]) > 256:
873
                    raise BadRequest('Container name too large.')
874
                if len(args) > 2 and len(args[2]) > 1024:
875
                    raise BadRequest('Object name too large.')
876
                
877
                # Format and check headers.
878
                update_request_headers(request)
879
                
880
                # Fill in custom request variables.
881
                request.serialization = request_serialization(request, format_allowed)
882
                request.backend = get_backend()
883
                
884
                response = func(request, *args, **kwargs)
885
                update_response_headers(request, response)
886
                return response
887
            except Fault, fault:
888
                return render_fault(request, fault)
889
            except BaseException, e:
890
                logger.exception('Unexpected error: %s' % e)
891
                fault = InternalServerError('Unexpected error')
892
                return render_fault(request, fault)
893
            finally:
894
                if getattr(request, 'backend', None) is not None:
895
                    request.backend.close()
896
        return wrapper
897
    return decorator