Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 4a669c71

History | View | Annotate | Download (34.8 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from pithos.lib.compat import parse_http_date_safe, parse_http_date
52

    
53
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
54
                                Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
55
                                RangeNotSatisfiable, InternalServerError, NotImplemented)
56
from pithos.api.short_url import encode_url
57
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
58
                                    BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
59
                                    BACKEND_QUEUE_MODULE, BACKEND_QUEUE_CONNECTION,
60
                                    BACKEND_QUOTA, BACKEND_VERSIONING)
61
from pithos.backends import connect_backend
62
from pithos.backends.base import NotAllowedError, QuotaError
63

    
64
import logging
65
import re
66
import hashlib
67
import uuid
68
import decimal
69

    
70

    
71
logger = logging.getLogger(__name__)
72

    
73

    
74
class UTC(tzinfo):
75
   def utcoffset(self, dt):
76
       return timedelta(0)
77

    
78
   def tzname(self, dt):
79
       return 'UTC'
80

    
81
   def dst(self, dt):
82
       return timedelta(0)
83

    
84
def json_encode_decimal(obj):
85
    if isinstance(obj, decimal.Decimal):
86
        return str(obj)
87
    raise TypeError(repr(obj) + " is not JSON serializable")
88

    
89
def isoformat(d):
90
   """Return an ISO8601 date string that includes a timezone."""
91

    
92
   return d.replace(tzinfo=UTC()).isoformat()
93

    
94
def rename_meta_key(d, old, new):
95
    if old not in d:
96
        return
97
    d[new] = d[old]
98
    del(d[old])
99

    
100
def printable_header_dict(d):
101
    """Format a meta dictionary for printing out json/xml.
102
    
103
    Convert all keys to lower case and replace dashes with underscores.
104
    Format 'last_modified' timestamp.
105
    """
106
    
107
    if 'last_modified' in d and d['last_modified']:
108
        d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
109
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
110

    
111
def format_header_key(k):
112
    """Convert underscores to dashes and capitalize intra-dash strings."""
113
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
114

    
115
def get_header_prefix(request, prefix):
116
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
117
    
118
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
119
    # TODO: Document or remove '~' replacing.
120
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
121

    
122
def check_meta_headers(meta):
123
    if len(meta) > 90:
124
        raise BadRequest('Too many headers.')
125
    for k, v in meta.iteritems():
126
        if len(k) > 128:
127
            raise BadRequest('Header name too large.')
128
        if len(v) > 256:
129
            raise BadRequest('Header value too large.')
130

    
131
def get_account_headers(request):
132
    meta = get_header_prefix(request, 'X-Account-Meta-')
133
    check_meta_headers(meta)
134
    groups = {}
135
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
136
        n = k[16:].lower()
137
        if '-' in n or '_' in n:
138
            raise BadRequest('Bad characters in group name')
139
        groups[n] = v.replace(' ', '').split(',')
140
        while '' in groups[n]:
141
            groups[n].remove('')
142
    return meta, groups
143

    
144
def put_account_headers(response, meta, groups, policy):
145
    if 'count' in meta:
146
        response['X-Account-Container-Count'] = meta['count']
147
    if 'bytes' in meta:
148
        response['X-Account-Bytes-Used'] = meta['bytes']
149
    response['Last-Modified'] = http_date(int(meta['modified']))
150
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
151
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
152
    if 'until_timestamp' in meta:
153
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
154
    for k, v in groups.iteritems():
155
        k = smart_str(k, strings_only=True)
156
        k = format_header_key('X-Account-Group-' + k)
157
        v = smart_str(','.join(v), strings_only=True)
158
        response[k] = v
159
    for k, v in policy.iteritems():
160
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
161

    
162
def get_container_headers(request):
163
    meta = get_header_prefix(request, 'X-Container-Meta-')
164
    check_meta_headers(meta)
165
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
166
    return meta, policy
167

    
168
def put_container_headers(request, response, meta, policy):
169
    if 'count' in meta:
170
        response['X-Container-Object-Count'] = meta['count']
171
    if 'bytes' in meta:
172
        response['X-Container-Bytes-Used'] = meta['bytes']
173
    response['Last-Modified'] = http_date(int(meta['modified']))
174
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
175
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
176
    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
177
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
178
    response['X-Container-Block-Size'] = request.backend.block_size
179
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
180
    if 'until_timestamp' in meta:
181
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
182
    for k, v in policy.iteritems():
183
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
184

    
185
def get_object_headers(request):
186
    content_type = request.META.get('CONTENT_TYPE', None)
187
    meta = get_header_prefix(request, 'X-Object-Meta-')
188
    check_meta_headers(meta)
189
    if request.META.get('HTTP_CONTENT_ENCODING'):
190
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
191
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
192
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
193
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
194
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
195
    return content_type, meta, get_sharing(request), get_public(request)
196

    
197
def put_object_headers(response, meta, restricted=False):
198
    response['ETag'] = meta['checksum']
199
    response['Content-Length'] = meta['bytes']
200
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
201
    response['Last-Modified'] = http_date(int(meta['modified']))
202
    if not restricted:
203
        response['X-Object-Hash'] = meta['hash']
204
        response['X-Object-UUID'] = meta['uuid']
205
        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
206
        response['X-Object-Version'] = meta['version']
207
        response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
208
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
209
            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
210
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
211
                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
212
                  'X-Object-Public'):
213
            if k in meta:
214
                response[k] = smart_str(meta[k], strings_only=True)
215
    else:
216
        for k in ('Content-Encoding', 'Content-Disposition'):
217
            if k in meta:
218
                response[k] = smart_str(meta[k], strings_only=True)
219

    
220
def update_manifest_meta(request, v_account, meta):
221
    """Update metadata if the object has an X-Object-Manifest."""
222
    
223
    if 'X-Object-Manifest' in meta:
224
        etag = ''
225
        bytes = 0
226
        try:
227
            src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
228
            objects = request.backend.list_objects(request.user_uniq, v_account,
229
                                src_container, prefix=src_name, virtual=False)
230
            for x in objects:
231
                src_meta = request.backend.get_object_meta(request.user_uniq,
232
                                        v_account, src_container, x[0], 'pithos', x[1])
233
                etag += src_meta['checksum']
234
                bytes += src_meta['bytes']
235
        except:
236
            # Ignore errors.
237
            return
238
        meta['bytes'] = bytes
239
        md5 = hashlib.md5()
240
        md5.update(etag)
241
        meta['checksum'] = md5.hexdigest().lower()
242

    
243
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
244
    if permissions is None:
245
        return
246
    allowed, perm_path, perms = permissions
247
    if len(perms) == 0:
248
        return
249
    ret = []
250
    r = ','.join(perms.get('read', []))
251
    if r:
252
        ret.append('read=' + r)
253
    w = ','.join(perms.get('write', []))
254
    if w:
255
        ret.append('write=' + w)
256
    meta['X-Object-Sharing'] = '; '.join(ret)
257
    if '/'.join((v_account, v_container, v_object)) != perm_path:
258
        meta['X-Object-Shared-By'] = perm_path
259
    if request.user_uniq != v_account:
260
        meta['X-Object-Allowed-To'] = allowed
261

    
262
def update_public_meta(public, meta):
263
    if not public:
264
        return
265
    meta['X-Object-Public'] = '/public/' + encode_url(public)
266

    
267
def validate_modification_preconditions(request, meta):
268
    """Check that the modified timestamp conforms with the preconditions set."""
269
    
270
    if 'modified' not in meta:
271
        return # TODO: Always return?
272
    
273
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
274
    if if_modified_since is not None:
275
        if_modified_since = parse_http_date_safe(if_modified_since)
276
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
277
        raise NotModified('Resource has not been modified')
278
    
279
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
280
    if if_unmodified_since is not None:
281
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
282
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
283
        raise PreconditionFailed('Resource has been modified')
284

    
285
def validate_matching_preconditions(request, meta):
286
    """Check that the ETag conforms with the preconditions set."""
287
    
288
    etag = meta['checksum']
289
    if not etag:
290
        etag = None
291
    
292
    if_match = request.META.get('HTTP_IF_MATCH')
293
    if if_match is not None:
294
        if etag is None:
295
            raise PreconditionFailed('Resource does not exist')
296
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
297
            raise PreconditionFailed('Resource ETag does not match')
298
    
299
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
300
    if if_none_match is not None:
301
        # TODO: If this passes, must ignore If-Modified-Since header.
302
        if etag is not None:
303
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
304
                # TODO: Continue if an If-Modified-Since header is present.
305
                if request.method in ('HEAD', 'GET'):
306
                    raise NotModified('Resource ETag matches')
307
                raise PreconditionFailed('Resource exists or ETag matches')
308

    
309
def split_container_object_string(s):
310
    if not len(s) > 0 or s[0] != '/':
311
        raise ValueError
312
    s = s[1:]
313
    pos = s.find('/')
314
    if pos == -1 or pos == len(s) - 1:
315
        raise ValueError
316
    return s[:pos], s[(pos + 1):]
317

    
318
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
319
    """Copy or move an object."""
320
    
321
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
322
        del(request.META['CONTENT_TYPE'])
323
    content_type, meta, permissions, public = get_object_headers(request)
324
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
325
    try:
326
        if move:
327
            version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
328
                                                        dest_account, dest_container, dest_name,
329
                                                        content_type, 'pithos', meta, False, permissions)
330
        else:
331
            version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
332
                                                        dest_account, dest_container, dest_name,
333
                                                        content_type, 'pithos', meta, False, permissions, src_version)
334
    except NotAllowedError:
335
        raise Forbidden('Not allowed')
336
    except (NameError, IndexError):
337
        raise ItemNotFound('Container or object does not exist')
338
    except ValueError:
339
        raise BadRequest('Invalid sharing header')
340
    except QuotaError:
341
        raise RequestEntityTooLarge('Quota exceeded')
342
    if public is not None:
343
        try:
344
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
345
        except NotAllowedError:
346
            raise Forbidden('Not allowed')
347
        except NameError:
348
            raise ItemNotFound('Object does not exist')
349
    return version_id
350

    
351
def get_int_parameter(p):
352
    if p is not None:
353
        try:
354
            p = int(p)
355
        except ValueError:
356
            return None
357
        if p < 0:
358
            return None
359
    return p
360

    
361
def get_content_length(request):
362
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
363
    if content_length is None:
364
        raise LengthRequired('Missing or invalid Content-Length header')
365
    return content_length
366

    
367
def get_range(request, size):
368
    """Parse a Range header from the request.
369
    
370
    Either returns None, when the header is not existent or should be ignored,
371
    or a list of (offset, length) tuples - should be further checked.
372
    """
373
    
374
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
375
    if not ranges.startswith('bytes='):
376
        return None
377
    
378
    ret = []
379
    for r in (x.strip() for x in ranges[6:].split(',')):
380
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
381
        m = p.match(r)
382
        if not m:
383
            return None
384
        offset = m.group('offset')
385
        upto = m.group('upto')
386
        if offset == '' and upto == '':
387
            return None
388
        
389
        if offset != '':
390
            offset = int(offset)
391
            if upto != '':
392
                upto = int(upto)
393
                if offset > upto:
394
                    return None
395
                ret.append((offset, upto - offset + 1))
396
            else:
397
                ret.append((offset, size - offset))
398
        else:
399
            length = int(upto)
400
            ret.append((size - length, length))
401
    
402
    return ret
403

    
404
def get_content_range(request):
405
    """Parse a Content-Range header from the request.
406
    
407
    Either returns None, when the header is not existent or should be ignored,
408
    or an (offset, length, total) tuple - check as length, total may be None.
409
    Returns (None, None, None) if the provided range is '*/*'.
410
    """
411
    
412
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
413
    if not ranges:
414
        return None
415
    
416
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
417
    m = p.match(ranges)
418
    if not m:
419
        if ranges == 'bytes */*':
420
            return (None, None, None)
421
        return None
422
    offset = int(m.group('offset'))
423
    upto = m.group('upto')
424
    total = m.group('total')
425
    if upto != '':
426
        upto = int(upto)
427
    else:
428
        upto = None
429
    if total != '*':
430
        total = int(total)
431
    else:
432
        total = None
433
    if (upto is not None and offset > upto) or \
434
        (total is not None and offset >= total) or \
435
        (total is not None and upto is not None and upto >= total):
436
        return None
437
    
438
    if upto is None:
439
        length = None
440
    else:
441
        length = upto - offset + 1
442
    return (offset, length, total)
443

    
444
def get_sharing(request):
445
    """Parse an X-Object-Sharing header from the request.
446
    
447
    Raises BadRequest on error.
448
    """
449
    
450
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
451
    if permissions is None:
452
        return None
453
    
454
    # TODO: Document or remove '~' replacing.
455
    permissions = permissions.replace('~', '')
456
    
457
    ret = {}
458
    permissions = permissions.replace(' ', '')
459
    if permissions == '':
460
        return ret
461
    for perm in (x for x in permissions.split(';')):
462
        if perm.startswith('read='):
463
            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
464
            if '' in ret['read']:
465
                ret['read'].remove('')
466
            if '*' in ret['read']:
467
                ret['read'] = ['*']
468
            if len(ret['read']) == 0:
469
                raise BadRequest('Bad X-Object-Sharing header value')
470
        elif perm.startswith('write='):
471
            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
472
            if '' in ret['write']:
473
                ret['write'].remove('')
474
            if '*' in ret['write']:
475
                ret['write'] = ['*']
476
            if len(ret['write']) == 0:
477
                raise BadRequest('Bad X-Object-Sharing header value')
478
        else:
479
            raise BadRequest('Bad X-Object-Sharing header value')
480
    
481
    # Keep duplicates only in write list.
482
    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
483
    if dups:
484
        for x in dups:
485
            ret['read'].remove(x)
486
        if len(ret['read']) == 0:
487
            del(ret['read'])
488
    
489
    return ret
490

    
491
def get_public(request):
492
    """Parse an X-Object-Public header from the request.
493
    
494
    Raises BadRequest on error.
495
    """
496
    
497
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
498
    if public is None:
499
        return None
500
    
501
    public = public.replace(' ', '').lower()
502
    if public == 'true':
503
        return True
504
    elif public == 'false' or public == '':
505
        return False
506
    raise BadRequest('Bad X-Object-Public header value')
507

    
508
def raw_input_socket(request):
509
    """Return the socket for reading the rest of the request."""
510
    
511
    server_software = request.META.get('SERVER_SOFTWARE')
512
    if server_software and server_software.startswith('mod_python'):
513
        return request._req
514
    if 'wsgi.input' in request.environ:
515
        return request.environ['wsgi.input']
516
    raise NotImplemented('Unknown server software')
517

    
518
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
519

    
520
def socket_read_iterator(request, length=0, blocksize=4096):
521
    """Return a maximum of blocksize data read from the socket in each iteration.
522
    
523
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
524
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
525
    """
526
    
527
    sock = raw_input_socket(request)
528
    if length < 0: # Chunked transfers
529
        # Small version (server does the dechunking).
530
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
531
            while length < MAX_UPLOAD_SIZE:
532
                data = sock.read(blocksize)
533
                if data == '':
534
                    return
535
                yield data
536
            raise BadRequest('Maximum size is reached')
537
        
538
        # Long version (do the dechunking).
539
        data = ''
540
        while length < MAX_UPLOAD_SIZE:
541
            # Get chunk size.
542
            if hasattr(sock, 'readline'):
543
                chunk_length = sock.readline()
544
            else:
545
                chunk_length = ''
546
                while chunk_length[-1:] != '\n':
547
                    chunk_length += sock.read(1)
548
                chunk_length.strip()
549
            pos = chunk_length.find(';')
550
            if pos >= 0:
551
                chunk_length = chunk_length[:pos]
552
            try:
553
                chunk_length = int(chunk_length, 16)
554
            except Exception, e:
555
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
556
            # Check if done.
557
            if chunk_length == 0:
558
                if len(data) > 0:
559
                    yield data
560
                return
561
            # Get the actual data.
562
            while chunk_length > 0:
563
                chunk = sock.read(min(chunk_length, blocksize))
564
                chunk_length -= len(chunk)
565
                if length > 0:
566
                    length += len(chunk)
567
                data += chunk
568
                if len(data) >= blocksize:
569
                    ret = data[:blocksize]
570
                    data = data[blocksize:]
571
                    yield ret
572
            sock.read(2) # CRLF
573
        raise BadRequest('Maximum size is reached')
574
    else:
575
        if length > MAX_UPLOAD_SIZE:
576
            raise BadRequest('Maximum size is reached')
577
        while length > 0:
578
            data = sock.read(min(length, blocksize))
579
            if not data:
580
                raise BadRequest()
581
            length -= len(data)
582
            yield data
583

    
584
class SaveToBackendHandler(FileUploadHandler):
585
    """Handle a file from an HTML form the django way."""
586
    
587
    def __init__(self, request=None):
588
        super(SaveToBackendHandler, self).__init__(request)
589
        self.backend = request.backend
590
    
591
    def put_data(self, length):
592
        if len(self.data) >= length:
593
            block = self.data[:length]
594
            self.file.hashmap.append(self.backend.put_block(block))
595
            self.md5.update(block)
596
            self.data = self.data[length:]
597
    
598
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
599
        self.md5 = hashlib.md5()        
600
        self.data = ''
601
        self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
602
        self.file.size = 0
603
        self.file.hashmap = []
604
    
605
    def receive_data_chunk(self, raw_data, start):
606
        self.data += raw_data
607
        self.file.size += len(raw_data)
608
        self.put_data(self.request.backend.block_size)
609
        return None
610
    
611
    def file_complete(self, file_size):
612
        l = len(self.data)
613
        if l > 0:
614
            self.put_data(l)
615
        self.file.etag = self.md5.hexdigest().lower()
616
        return self.file
617

    
618
class ObjectWrapper(object):
619
    """Return the object's data block-per-block in each iteration.
620
    
621
    Read from the object using the offset and length provided in each entry of the range list.
622
    """
623
    
624
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
625
        self.backend = backend
626
        self.ranges = ranges
627
        self.sizes = sizes
628
        self.hashmaps = hashmaps
629
        self.boundary = boundary
630
        self.size = sum(self.sizes)
631
        
632
        self.file_index = 0
633
        self.block_index = 0
634
        self.block_hash = -1
635
        self.block = ''
636
        
637
        self.range_index = -1
638
        self.offset, self.length = self.ranges[0]
639
    
640
    def __iter__(self):
641
        return self
642
    
643
    def part_iterator(self):
644
        if self.length > 0:
645
            # Get the file for the current offset.
646
            file_size = self.sizes[self.file_index]
647
            while self.offset >= file_size:
648
                self.offset -= file_size
649
                self.file_index += 1
650
                file_size = self.sizes[self.file_index]
651
            
652
            # Get the block for the current position.
653
            self.block_index = int(self.offset / self.backend.block_size)
654
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
655
                self.block_hash = self.hashmaps[self.file_index][self.block_index]
656
                try:
657
                    self.block = self.backend.get_block(self.block_hash)
658
                except NameError:
659
                    raise ItemNotFound('Block does not exist')
660
            
661
            # Get the data from the block.
662
            bo = self.offset % self.backend.block_size
663
            bs = self.backend.block_size
664
            if self.block_index == len(self.hashmaps[self.file_index]) - 1:
665
                bs = self.sizes[self.file_index] % self.backend.block_size
666
            bl = min(self.length, bs - bo)
667
            data = self.block[bo:bo + bl]
668
            self.offset += bl
669
            self.length -= bl
670
            return data
671
        else:
672
            raise StopIteration
673
    
674
    def next(self):
675
        if len(self.ranges) == 1:
676
            return self.part_iterator()
677
        if self.range_index == len(self.ranges):
678
            raise StopIteration
679
        try:
680
            if self.range_index == -1:
681
                raise StopIteration
682
            return self.part_iterator()
683
        except StopIteration:
684
            self.range_index += 1
685
            out = []
686
            if self.range_index < len(self.ranges):
687
                # Part header.
688
                self.offset, self.length = self.ranges[self.range_index]
689
                self.file_index = 0
690
                if self.range_index > 0:
691
                    out.append('')
692
                out.append('--' + self.boundary)
693
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
694
                out.append('Content-Transfer-Encoding: binary')
695
                out.append('')
696
                out.append('')
697
                return '\r\n'.join(out)
698
            else:
699
                # Footer.
700
                out.append('')
701
                out.append('--' + self.boundary + '--')
702
                out.append('')
703
                return '\r\n'.join(out)
704

    
705
def object_data_response(request, sizes, hashmaps, meta, public=False):
706
    """Get the HttpResponse object for replying with the object's data."""
707
    
708
    # Range handling.
709
    size = sum(sizes)
710
    ranges = get_range(request, size)
711
    if ranges is None:
712
        ranges = [(0, size)]
713
        ret = 200
714
    else:
715
        check = [True for offset, length in ranges if
716
                    length <= 0 or length > size or
717
                    offset < 0 or offset >= size or
718
                    offset + length > size]
719
        if len(check) > 0:
720
            raise RangeNotSatisfiable('Requested range exceeds object limits')
721
        ret = 206
722
        if_range = request.META.get('HTTP_IF_RANGE')
723
        if if_range:
724
            try:
725
                # Modification time has passed instead.
726
                last_modified = parse_http_date(if_range)
727
                if last_modified != meta['modified']:
728
                    ranges = [(0, size)]
729
                    ret = 200
730
            except ValueError:
731
                if if_range != meta['checksum']:
732
                    ranges = [(0, size)]
733
                    ret = 200
734
    
735
    if ret == 206 and len(ranges) > 1:
736
        boundary = uuid.uuid4().hex
737
    else:
738
        boundary = ''
739
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
740
    response = HttpResponse(wrapper, status=ret)
741
    put_object_headers(response, meta, public)
742
    if ret == 206:
743
        if len(ranges) == 1:
744
            offset, length = ranges[0]
745
            response['Content-Length'] = length # Update with the correct length.
746
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
747
        else:
748
            del(response['Content-Length'])
749
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
750
    return response
751

    
752
def put_object_block(request, hashmap, data, offset):
753
    """Put one block of data at the given offset."""
754
    
755
    bi = int(offset / request.backend.block_size)
756
    bo = offset % request.backend.block_size
757
    bl = min(len(data), request.backend.block_size - bo)
758
    if bi < len(hashmap):
759
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
760
    else:
761
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
762
    return bl # Return ammount of data written.
763

    
764
def hashmap_md5(request, hashmap, size):
765
    """Produce the MD5 sum from the data in the hashmap."""
766
    
767
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
768
    md5 = hashlib.md5()
769
    bs = request.backend.block_size
770
    for bi, hash in enumerate(hashmap):
771
        data = request.backend.get_block(hash) # Blocks come in padded.
772
        if bi == len(hashmap) - 1:
773
            data = data[:size % bs]
774
        md5.update(data)
775
    return md5.hexdigest().lower()
776

    
777
def simple_list_response(request, l):
778
    if request.serialization == 'text':
779
        return '\n'.join(l) + '\n'
780
    if request.serialization == 'xml':
781
        return render_to_string('items.xml', {'items': l})
782
    if request.serialization == 'json':
783
        return json.dumps(l)
784

    
785
def get_backend():
786
    backend = connect_backend(db_module=BACKEND_DB_MODULE,
787
                              db_connection=BACKEND_DB_CONNECTION,
788
                              block_module=BACKEND_BLOCK_MODULE,
789
                              block_path=BACKEND_BLOCK_PATH,
790
                              queue_module=BACKEND_QUEUE_MODULE,
791
                              queue_connection=BACKEND_QUEUE_CONNECTION)
792
    backend.default_policy['quota'] = BACKEND_QUOTA
793
    backend.default_policy['versioning'] = BACKEND_VERSIONING
794
    return backend
795

    
796
def update_request_headers(request):
797
    # Handle URL-encoded keys and values.
798
    meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
799
    for k, v in meta.iteritems():
800
        try:
801
            k.decode('ascii')
802
            v.decode('ascii')
803
        except UnicodeDecodeError:
804
            raise BadRequest('Bad character in headers.')
805
        if '%' in k or '%' in v:
806
            del(request.META[k])
807
            request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
808

    
809
def update_response_headers(request, response):
810
    if request.serialization == 'xml':
811
        response['Content-Type'] = 'application/xml; charset=UTF-8'
812
    elif request.serialization == 'json':
813
        response['Content-Type'] = 'application/json; charset=UTF-8'
814
    elif not response['Content-Type']:
815
        response['Content-Type'] = 'text/plain; charset=UTF-8'
816
    
817
    if (not response.has_header('Content-Length') and
818
        not (response.has_header('Content-Type') and
819
             response['Content-Type'].startswith('multipart/byteranges'))):
820
        response['Content-Length'] = len(response.content)
821
    
822
    # URL-encode unicode in headers.
823
    meta = response.items()
824
    for k, v in meta:
825
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
826
            k.startswith('X-Object-') or k.startswith('Content-')):
827
            del(response[k])
828
            response[quote(k)] = quote(v, safe='/=,:@; ')
829

    
830
def render_fault(request, fault):
831
    if isinstance(fault, InternalServerError) and settings.DEBUG:
832
        fault.details = format_exc(fault)
833
    
834
    request.serialization = 'text'
835
    data = fault.message + '\n'
836
    if fault.details:
837
        data += '\n' + fault.details
838
    response = HttpResponse(data, status=fault.code)
839
    update_response_headers(request, response)
840
    return response
841

    
842
def request_serialization(request, format_allowed=False):
843
    """Return the serialization format requested.
844
    
845
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
846
    """
847
    
848
    if not format_allowed:
849
        return 'text'
850
    
851
    format = request.GET.get('format')
852
    if format == 'json':
853
        return 'json'
854
    elif format == 'xml':
855
        return 'xml'
856
    
857
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
858
        accept, sep, rest = item.strip().partition(';')
859
        if accept == 'application/json':
860
            return 'json'
861
        elif accept == 'application/xml' or accept == 'text/xml':
862
            return 'xml'
863
    
864
    return 'text'
865

    
866
def api_method(http_method=None, format_allowed=False, user_required=True):
867
    """Decorator function for views that implement an API method."""
868
    
869
    def decorator(func):
870
        @wraps(func)
871
        def wrapper(request, *args, **kwargs):
872
            try:
873
                if http_method and request.method != http_method:
874
                    raise BadRequest('Method not allowed.')
875
                if user_required and getattr(request, 'user', None) is None:
876
                    raise Unauthorized('Access denied')
877
                
878
                # The args variable may contain up to (account, container, object).
879
                if len(args) > 1 and len(args[1]) > 256:
880
                    raise BadRequest('Container name too large.')
881
                if len(args) > 2 and len(args[2]) > 1024:
882
                    raise BadRequest('Object name too large.')
883
                
884
                # Format and check headers.
885
                update_request_headers(request)
886
                
887
                # Fill in custom request variables.
888
                request.serialization = request_serialization(request, format_allowed)
889
                request.backend = get_backend()
890
                
891
                response = func(request, *args, **kwargs)
892
                update_response_headers(request, response)
893
                return response
894
            except Fault, fault:
895
                return render_fault(request, fault)
896
            except BaseException, e:
897
                logger.exception('Unexpected error: %s' % e)
898
                fault = InternalServerError('Unexpected error')
899
                return render_fault(request, fault)
900
            finally:
901
                if getattr(request, 'backend', None) is not None:
902
                    request.backend.close()
903
        return wrapper
904
    return decorator