Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 45cf0bc8

History | View | Annotate | Download (34.9 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from synnefo.lib.parsedate import parse_http_date_safe, parse_http_date
52

    
53
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
54
                                Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
55
                                RangeNotSatisfiable, InternalServerError, NotImplemented)
56
from pithos.api.short_url import encode_url
57
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
58
                                    BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
59
                                    BACKEND_QUEUE_MODULE, BACKEND_QUEUE_CONNECTION,
60
                                    BACKEND_QUOTA, BACKEND_VERSIONING)
61
from pithos.backends import connect_backend
62
from pithos.backends.base import NotAllowedError, QuotaError
63

    
64
import logging
65
import re
66
import hashlib
67
import uuid
68
import decimal
69

    
70

    
71
logger = logging.getLogger(__name__)
72

    
73

    
74
class UTC(tzinfo):
75
   def utcoffset(self, dt):
76
       return timedelta(0)
77

    
78
   def tzname(self, dt):
79
       return 'UTC'
80

    
81
   def dst(self, dt):
82
       return timedelta(0)
83

    
84
def json_encode_decimal(obj):
85
    if isinstance(obj, decimal.Decimal):
86
        return str(obj)
87
    raise TypeError(repr(obj) + " is not JSON serializable")
88

    
89
def isoformat(d):
90
   """Return an ISO8601 date string that includes a timezone."""
91

    
92
   return d.replace(tzinfo=UTC()).isoformat()
93

    
94
def rename_meta_key(d, old, new):
95
    if old not in d:
96
        return
97
    d[new] = d[old]
98
    del(d[old])
99

    
100
def printable_header_dict(d):
101
    """Format a meta dictionary for printing out json/xml.
102
    
103
    Convert all keys to lower case and replace dashes with underscores.
104
    Format 'last_modified' timestamp.
105
    """
106
    
107
    if 'last_modified' in d and d['last_modified']:
108
        d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
109
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
110

    
111
def format_header_key(k):
112
    """Convert underscores to dashes and capitalize intra-dash strings."""
113
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
114

    
115
def get_header_prefix(request, prefix):
116
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
117
    
118
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
119
    # TODO: Document or remove '~' replacing.
120
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
121

    
122
def check_meta_headers(meta):
123
    if len(meta) > 90:
124
        raise BadRequest('Too many headers.')
125
    for k, v in meta.iteritems():
126
        if len(k) > 128:
127
            raise BadRequest('Header name too large.')
128
        if len(v) > 256:
129
            raise BadRequest('Header value too large.')
130

    
131
def get_account_headers(request):
132
    meta = get_header_prefix(request, 'X-Account-Meta-')
133
    check_meta_headers(meta)
134
    groups = {}
135
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
136
        n = k[16:].lower()
137
        if '-' in n or '_' in n:
138
            raise BadRequest('Bad characters in group name')
139
        groups[n] = v.replace(' ', '').split(',')
140
        while '' in groups[n]:
141
            groups[n].remove('')
142
    return meta, groups
143

    
144
def put_account_headers(response, meta, groups, policy):
145
    if 'count' in meta:
146
        response['X-Account-Container-Count'] = meta['count']
147
    if 'bytes' in meta:
148
        response['X-Account-Bytes-Used'] = meta['bytes']
149
    response['Last-Modified'] = http_date(int(meta['modified']))
150
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
151
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
152
    if 'until_timestamp' in meta:
153
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
154
    for k, v in groups.iteritems():
155
        k = smart_str(k, strings_only=True)
156
        k = format_header_key('X-Account-Group-' + k)
157
        v = smart_str(','.join(v), strings_only=True)
158
        response[k] = v
159
    for k, v in policy.iteritems():
160
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
161

    
162
def get_container_headers(request):
163
    meta = get_header_prefix(request, 'X-Container-Meta-')
164
    check_meta_headers(meta)
165
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
166
    return meta, policy
167

    
168
def put_container_headers(request, response, meta, policy):
169
    if 'count' in meta:
170
        response['X-Container-Object-Count'] = meta['count']
171
    if 'bytes' in meta:
172
        response['X-Container-Bytes-Used'] = meta['bytes']
173
    response['Last-Modified'] = http_date(int(meta['modified']))
174
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
175
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
176
    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
177
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
178
    response['X-Container-Block-Size'] = request.backend.block_size
179
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
180
    if 'until_timestamp' in meta:
181
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
182
    for k, v in policy.iteritems():
183
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
184

    
185
def get_object_headers(request):
186
    content_type = request.META.get('CONTENT_TYPE', None)
187
    meta = get_header_prefix(request, 'X-Object-Meta-')
188
    check_meta_headers(meta)
189
    if request.META.get('HTTP_CONTENT_ENCODING'):
190
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
191
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
192
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
193
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
194
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
195
    return content_type, meta, get_sharing(request), get_public(request)
196

    
197
def put_object_headers(response, meta, restricted=False):
198
    response['ETag'] = meta['checksum']
199
    response['Content-Length'] = meta['bytes']
200
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
201
    response['Last-Modified'] = http_date(int(meta['modified']))
202
    if not restricted:
203
        response['X-Object-Hash'] = meta['hash']
204
        response['X-Object-UUID'] = meta['uuid']
205
        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
206
        response['X-Object-Version'] = meta['version']
207
        response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
208
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
209
            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
210
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
211
                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
212
                  'X-Object-Public'):
213
            if k in meta:
214
                response[k] = smart_str(meta[k], strings_only=True)
215
    else:
216
        for k in ('Content-Encoding', 'Content-Disposition'):
217
            if k in meta:
218
                response[k] = smart_str(meta[k], strings_only=True)
219

    
220
def update_manifest_meta(request, v_account, meta):
221
    """Update metadata if the object has an X-Object-Manifest."""
222
    
223
    if 'X-Object-Manifest' in meta:
224
        etag = ''
225
        bytes = 0
226
        try:
227
            src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
228
            objects = request.backend.list_objects(request.user_uniq, v_account,
229
                                src_container, prefix=src_name, virtual=False)
230
            for x in objects:
231
                src_meta = request.backend.get_object_meta(request.user_uniq,
232
                                        v_account, src_container, x[0], 'pithos', x[1])
233
                etag += src_meta['checksum']
234
                bytes += src_meta['bytes']
235
        except:
236
            # Ignore errors.
237
            return
238
        meta['bytes'] = bytes
239
        md5 = hashlib.md5()
240
        md5.update(etag)
241
        meta['checksum'] = md5.hexdigest().lower()
242

    
243
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
244
    if permissions is None:
245
        return
246
    allowed, perm_path, perms = permissions
247
    if len(perms) == 0:
248
        return
249
    ret = []
250
    r = ','.join(perms.get('read', []))
251
    if r:
252
        ret.append('read=' + r)
253
    w = ','.join(perms.get('write', []))
254
    if w:
255
        ret.append('write=' + w)
256
    meta['X-Object-Sharing'] = '; '.join(ret)
257
    if '/'.join((v_account, v_container, v_object)) != perm_path:
258
        meta['X-Object-Shared-By'] = perm_path
259
    if request.user_uniq != v_account:
260
        meta['X-Object-Allowed-To'] = allowed
261

    
262
def update_public_meta(public, meta):
263
    if not public:
264
        return
265
    meta['X-Object-Public'] = '/public/' + encode_url(public)
266

    
267
def validate_modification_preconditions(request, meta):
268
    """Check that the modified timestamp conforms with the preconditions set."""
269
    
270
    if 'modified' not in meta:
271
        return # TODO: Always return?
272
    
273
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
274
    if if_modified_since is not None:
275
        if_modified_since = parse_http_date_safe(if_modified_since)
276
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
277
        raise NotModified('Resource has not been modified')
278
    
279
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
280
    if if_unmodified_since is not None:
281
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
282
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
283
        raise PreconditionFailed('Resource has been modified')
284

    
285
def validate_matching_preconditions(request, meta):
286
    """Check that the ETag conforms with the preconditions set."""
287
    
288
    etag = meta['checksum']
289
    if not etag:
290
        etag = None
291
    
292
    if_match = request.META.get('HTTP_IF_MATCH')
293
    if if_match is not None:
294
        if etag is None:
295
            raise PreconditionFailed('Resource does not exist')
296
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
297
            raise PreconditionFailed('Resource ETag does not match')
298
    
299
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
300
    if if_none_match is not None:
301
        # TODO: If this passes, must ignore If-Modified-Since header.
302
        if etag is not None:
303
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
304
                # TODO: Continue if an If-Modified-Since header is present.
305
                if request.method in ('HEAD', 'GET'):
306
                    raise NotModified('Resource ETag matches')
307
                raise PreconditionFailed('Resource exists or ETag matches')
308

    
309
def split_container_object_string(s):
310
    if not len(s) > 0 or s[0] != '/':
311
        raise ValueError
312
    s = s[1:]
313
    pos = s.find('/')
314
    if pos == -1 or pos == len(s) - 1:
315
        raise ValueError
316
    return s[:pos], s[(pos + 1):]
317

    
318
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
319
    """Copy or move an object."""
320
    
321
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
322
        del(request.META['CONTENT_TYPE'])
323
    content_type, meta, permissions, public = get_object_headers(request)
324
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
325
    try:
326
        if move:
327
            version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
328
                                                        dest_account, dest_container, dest_name,
329
                                                        content_type, 'pithos', meta, False, permissions)
330
        else:
331
            version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
332
                                                        dest_account, dest_container, dest_name,
333
                                                        content_type, 'pithos', meta, False, permissions, src_version)
334
    except NotAllowedError:
335
        raise Forbidden('Not allowed')
336
    except (NameError, IndexError):
337
        raise ItemNotFound('Container or object does not exist')
338
    except ValueError:
339
        raise BadRequest('Invalid sharing header')
340
    except QuotaError:
341
        raise RequestEntityTooLarge('Quota exceeded')
342
    if public is not None:
343
        try:
344
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
345
        except NotAllowedError:
346
            raise Forbidden('Not allowed')
347
        except NameError:
348
            raise ItemNotFound('Object does not exist')
349
    return version_id
350

    
351
def get_int_parameter(p):
352
    if p is not None:
353
        try:
354
            p = int(p)
355
        except ValueError:
356
            return None
357
        if p < 0:
358
            return None
359
    return p
360

    
361
def get_content_length(request):
362
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
363
    if content_length is None:
364
        raise LengthRequired('Missing or invalid Content-Length header')
365
    return content_length
366

    
367
def get_range(request, size):
368
    """Parse a Range header from the request.
369
    
370
    Either returns None, when the header is not existent or should be ignored,
371
    or a list of (offset, length) tuples - should be further checked.
372
    """
373
    
374
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
375
    if not ranges.startswith('bytes='):
376
        return None
377
    
378
    ret = []
379
    for r in (x.strip() for x in ranges[6:].split(',')):
380
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
381
        m = p.match(r)
382
        if not m:
383
            return None
384
        offset = m.group('offset')
385
        upto = m.group('upto')
386
        if offset == '' and upto == '':
387
            return None
388
        
389
        if offset != '':
390
            offset = int(offset)
391
            if upto != '':
392
                upto = int(upto)
393
                if offset > upto:
394
                    return None
395
                ret.append((offset, upto - offset + 1))
396
            else:
397
                ret.append((offset, size - offset))
398
        else:
399
            length = int(upto)
400
            ret.append((size - length, length))
401
    
402
    return ret
403

    
404
def get_content_range(request):
405
    """Parse a Content-Range header from the request.
406
    
407
    Either returns None, when the header is not existent or should be ignored,
408
    or an (offset, length, total) tuple - check as length, total may be None.
409
    Returns (None, None, None) if the provided range is '*/*'.
410
    """
411
    
412
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
413
    if not ranges:
414
        return None
415
    
416
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
417
    m = p.match(ranges)
418
    if not m:
419
        if ranges == 'bytes */*':
420
            return (None, None, None)
421
        return None
422
    offset = int(m.group('offset'))
423
    upto = m.group('upto')
424
    total = m.group('total')
425
    if upto != '':
426
        upto = int(upto)
427
    else:
428
        upto = None
429
    if total != '*':
430
        total = int(total)
431
    else:
432
        total = None
433
    if (upto is not None and offset > upto) or \
434
        (total is not None and offset >= total) or \
435
        (total is not None and upto is not None and upto >= total):
436
        return None
437
    
438
    if upto is None:
439
        length = None
440
    else:
441
        length = upto - offset + 1
442
    return (offset, length, total)
443

    
444
def get_sharing(request):
445
    """Parse an X-Object-Sharing header from the request.
446
    
447
    Raises BadRequest on error.
448
    """
449
    
450
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
451
    if permissions is None:
452
        return None
453
    
454
    # TODO: Document or remove '~' replacing.
455
    permissions = permissions.replace('~', '')
456
    
457
    ret = {}
458
    permissions = permissions.replace(' ', '')
459
    if permissions == '':
460
        return ret
461
    for perm in (x for x in permissions.split(';')):
462
        if perm.startswith('read='):
463
            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
464
            if '' in ret['read']:
465
                ret['read'].remove('')
466
            if '*' in ret['read']:
467
                ret['read'] = ['*']
468
            if len(ret['read']) == 0:
469
                raise BadRequest('Bad X-Object-Sharing header value')
470
        elif perm.startswith('write='):
471
            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
472
            if '' in ret['write']:
473
                ret['write'].remove('')
474
            if '*' in ret['write']:
475
                ret['write'] = ['*']
476
            if len(ret['write']) == 0:
477
                raise BadRequest('Bad X-Object-Sharing header value')
478
        else:
479
            raise BadRequest('Bad X-Object-Sharing header value')
480
    
481
    # Keep duplicates only in write list.
482
    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
483
    if dups:
484
        for x in dups:
485
            ret['read'].remove(x)
486
        if len(ret['read']) == 0:
487
            del(ret['read'])
488
    
489
    return ret
490

    
491
def get_public(request):
492
    """Parse an X-Object-Public header from the request.
493
    
494
    Raises BadRequest on error.
495
    """
496
    
497
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
498
    if public is None:
499
        return None
500
    
501
    public = public.replace(' ', '').lower()
502
    if public == 'true':
503
        return True
504
    elif public == 'false' or public == '':
505
        return False
506
    raise BadRequest('Bad X-Object-Public header value')
507

    
508
def raw_input_socket(request):
509
    """Return the socket for reading the rest of the request."""
510
    
511
    server_software = request.META.get('SERVER_SOFTWARE')
512
    if server_software and server_software.startswith('mod_python'):
513
        return request._req
514
    if 'wsgi.input' in request.environ:
515
        return request.environ['wsgi.input']
516
    raise NotImplemented('Unknown server software')
517

    
518
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
519

    
520
def socket_read_iterator(request, length=0, blocksize=4096):
521
    """Return a maximum of blocksize data read from the socket in each iteration.
522
    
523
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
524
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
525
    """
526
    
527
    sock = raw_input_socket(request)
528
    if length < 0: # Chunked transfers
529
        # Small version (server does the dechunking).
530
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
531
            while length < MAX_UPLOAD_SIZE:
532
                data = sock.read(blocksize)
533
                if data == '':
534
                    return
535
                yield data
536
            raise BadRequest('Maximum size is reached')
537
        
538
        # Long version (do the dechunking).
539
        data = ''
540
        while length < MAX_UPLOAD_SIZE:
541
            # Get chunk size.
542
            if hasattr(sock, 'readline'):
543
                chunk_length = sock.readline()
544
            else:
545
                chunk_length = ''
546
                while chunk_length[-1:] != '\n':
547
                    chunk_length += sock.read(1)
548
                chunk_length.strip()
549
            pos = chunk_length.find(';')
550
            if pos >= 0:
551
                chunk_length = chunk_length[:pos]
552
            try:
553
                chunk_length = int(chunk_length, 16)
554
            except Exception, e:
555
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
556
            # Check if done.
557
            if chunk_length == 0:
558
                if len(data) > 0:
559
                    yield data
560
                return
561
            # Get the actual data.
562
            while chunk_length > 0:
563
                chunk = sock.read(min(chunk_length, blocksize))
564
                chunk_length -= len(chunk)
565
                if length > 0:
566
                    length += len(chunk)
567
                data += chunk
568
                if len(data) >= blocksize:
569
                    ret = data[:blocksize]
570
                    data = data[blocksize:]
571
                    yield ret
572
            sock.read(2) # CRLF
573
        raise BadRequest('Maximum size is reached')
574
    else:
575
        if length > MAX_UPLOAD_SIZE:
576
            raise BadRequest('Maximum size is reached')
577
        while length > 0:
578
            data = sock.read(min(length, blocksize))
579
            if not data:
580
                raise BadRequest()
581
            length -= len(data)
582
            yield data
583

    
584
class SaveToBackendHandler(FileUploadHandler):
585
    """Handle a file from an HTML form the django way."""
586
    
587
    def __init__(self, request=None):
588
        super(SaveToBackendHandler, self).__init__(request)
589
        self.backend = request.backend
590
    
591
    def put_data(self, length):
592
        if len(self.data) >= length:
593
            block = self.data[:length]
594
            self.file.hashmap.append(self.backend.put_block(block))
595
            self.md5.update(block)
596
            self.data = self.data[length:]
597
    
598
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
599
        self.md5 = hashlib.md5()        
600
        self.data = ''
601
        self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
602
        self.file.size = 0
603
        self.file.hashmap = []
604
    
605
    def receive_data_chunk(self, raw_data, start):
606
        self.data += raw_data
607
        self.file.size += len(raw_data)
608
        self.put_data(self.request.backend.block_size)
609
        return None
610
    
611
    def file_complete(self, file_size):
612
        l = len(self.data)
613
        if l > 0:
614
            self.put_data(l)
615
        self.file.etag = self.md5.hexdigest().lower()
616
        return self.file
617

    
618
class ObjectWrapper(object):
619
    """Return the object's data block-per-block in each iteration.
620
    
621
    Read from the object using the offset and length provided in each entry of the range list.
622
    """
623
    
624
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
625
        self.backend = backend
626
        self.ranges = ranges
627
        self.sizes = sizes
628
        self.hashmaps = hashmaps
629
        self.boundary = boundary
630
        self.size = sum(self.sizes)
631
        
632
        self.file_index = 0
633
        self.block_index = 0
634
        self.block_hash = -1
635
        self.block = ''
636
        
637
        self.range_index = -1
638
        self.offset, self.length = self.ranges[0]
639
    
640
    def __iter__(self):
641
        return self
642
    
643
    def part_iterator(self):
644
        if self.length > 0:
645
            # Get the file for the current offset.
646
            file_size = self.sizes[self.file_index]
647
            while self.offset >= file_size:
648
                self.offset -= file_size
649
                self.file_index += 1
650
                file_size = self.sizes[self.file_index]
651
            
652
            # Get the block for the current position.
653
            self.block_index = int(self.offset / self.backend.block_size)
654
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
655
                self.block_hash = self.hashmaps[self.file_index][self.block_index]
656
                try:
657
                    self.block = self.backend.get_block(self.block_hash)
658
                except NameError:
659
                    raise ItemNotFound('Block does not exist')
660
            
661
            # Get the data from the block.
662
            bo = self.offset % self.backend.block_size
663
            bs = self.backend.block_size
664
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
665
                self.sizes[self.file_index] % self.backend.block_size):
666
                bs = self.sizes[self.file_index] % self.backend.block_size
667
            bl = min(self.length, bs - bo)
668
            data = self.block[bo:bo + bl]
669
            self.offset += bl
670
            self.length -= bl
671
            return data
672
        else:
673
            raise StopIteration
674
    
675
    def next(self):
676
        if len(self.ranges) == 1:
677
            return self.part_iterator()
678
        if self.range_index == len(self.ranges):
679
            raise StopIteration
680
        try:
681
            if self.range_index == -1:
682
                raise StopIteration
683
            return self.part_iterator()
684
        except StopIteration:
685
            self.range_index += 1
686
            out = []
687
            if self.range_index < len(self.ranges):
688
                # Part header.
689
                self.offset, self.length = self.ranges[self.range_index]
690
                self.file_index = 0
691
                if self.range_index > 0:
692
                    out.append('')
693
                out.append('--' + self.boundary)
694
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
695
                out.append('Content-Transfer-Encoding: binary')
696
                out.append('')
697
                out.append('')
698
                return '\r\n'.join(out)
699
            else:
700
                # Footer.
701
                out.append('')
702
                out.append('--' + self.boundary + '--')
703
                out.append('')
704
                return '\r\n'.join(out)
705

    
706
def object_data_response(request, sizes, hashmaps, meta, public=False):
707
    """Get the HttpResponse object for replying with the object's data."""
708
    
709
    # Range handling.
710
    size = sum(sizes)
711
    ranges = get_range(request, size)
712
    if ranges is None:
713
        ranges = [(0, size)]
714
        ret = 200
715
    else:
716
        check = [True for offset, length in ranges if
717
                    length <= 0 or length > size or
718
                    offset < 0 or offset >= size or
719
                    offset + length > size]
720
        if len(check) > 0:
721
            raise RangeNotSatisfiable('Requested range exceeds object limits')
722
        ret = 206
723
        if_range = request.META.get('HTTP_IF_RANGE')
724
        if if_range:
725
            try:
726
                # Modification time has passed instead.
727
                last_modified = parse_http_date(if_range)
728
                if last_modified != meta['modified']:
729
                    ranges = [(0, size)]
730
                    ret = 200
731
            except ValueError:
732
                if if_range != meta['checksum']:
733
                    ranges = [(0, size)]
734
                    ret = 200
735
    
736
    if ret == 206 and len(ranges) > 1:
737
        boundary = uuid.uuid4().hex
738
    else:
739
        boundary = ''
740
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
741
    response = HttpResponse(wrapper, status=ret)
742
    put_object_headers(response, meta, public)
743
    if ret == 206:
744
        if len(ranges) == 1:
745
            offset, length = ranges[0]
746
            response['Content-Length'] = length # Update with the correct length.
747
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
748
        else:
749
            del(response['Content-Length'])
750
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
751
    return response
752

    
753
def put_object_block(request, hashmap, data, offset):
754
    """Put one block of data at the given offset."""
755
    
756
    bi = int(offset / request.backend.block_size)
757
    bo = offset % request.backend.block_size
758
    bl = min(len(data), request.backend.block_size - bo)
759
    if bi < len(hashmap):
760
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
761
    else:
762
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
763
    return bl # Return ammount of data written.
764

    
765
def hashmap_md5(backend, hashmap, size):
766
    """Produce the MD5 sum from the data in the hashmap."""
767
    
768
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
769
    md5 = hashlib.md5()
770
    bs = backend.block_size
771
    for bi, hash in enumerate(hashmap):
772
        data = backend.get_block(hash) # Blocks come in padded.
773
        if bi == len(hashmap) - 1:
774
            data = data[:size % bs]
775
        md5.update(data)
776
    return md5.hexdigest().lower()
777

    
778
def simple_list_response(request, l):
779
    if request.serialization == 'text':
780
        return '\n'.join(l) + '\n'
781
    if request.serialization == 'xml':
782
        return render_to_string('items.xml', {'items': l})
783
    if request.serialization == 'json':
784
        return json.dumps(l)
785

    
786
def get_backend():
787
    backend = connect_backend(db_module=BACKEND_DB_MODULE,
788
                              db_connection=BACKEND_DB_CONNECTION,
789
                              block_module=BACKEND_BLOCK_MODULE,
790
                              block_path=BACKEND_BLOCK_PATH,
791
                              queue_module=BACKEND_QUEUE_MODULE,
792
                              queue_connection=BACKEND_QUEUE_CONNECTION)
793
    backend.default_policy['quota'] = BACKEND_QUOTA
794
    backend.default_policy['versioning'] = BACKEND_VERSIONING
795
    return backend
796

    
797
def update_request_headers(request):
798
    # Handle URL-encoded keys and values.
799
    meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
800
    for k, v in meta.iteritems():
801
        try:
802
            k.decode('ascii')
803
            v.decode('ascii')
804
        except UnicodeDecodeError:
805
            raise BadRequest('Bad character in headers.')
806
        if '%' in k or '%' in v:
807
            del(request.META[k])
808
            request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
809

    
810
def update_response_headers(request, response):
811
    if request.serialization == 'xml':
812
        response['Content-Type'] = 'application/xml; charset=UTF-8'
813
    elif request.serialization == 'json':
814
        response['Content-Type'] = 'application/json; charset=UTF-8'
815
    elif not response['Content-Type']:
816
        response['Content-Type'] = 'text/plain; charset=UTF-8'
817
    
818
    if (not response.has_header('Content-Length') and
819
        not (response.has_header('Content-Type') and
820
             response['Content-Type'].startswith('multipart/byteranges'))):
821
        response['Content-Length'] = len(response.content)
822
    
823
    # URL-encode unicode in headers.
824
    meta = response.items()
825
    for k, v in meta:
826
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
827
            k.startswith('X-Object-') or k.startswith('Content-')):
828
            del(response[k])
829
            response[quote(k)] = quote(v, safe='/=,:@; ')
830

    
831
def render_fault(request, fault):
832
    if isinstance(fault, InternalServerError) and settings.DEBUG:
833
        fault.details = format_exc(fault)
834
    
835
    request.serialization = 'text'
836
    data = fault.message + '\n'
837
    if fault.details:
838
        data += '\n' + fault.details
839
    response = HttpResponse(data, status=fault.code)
840
    update_response_headers(request, response)
841
    return response
842

    
843
def request_serialization(request, format_allowed=False):
844
    """Return the serialization format requested.
845
    
846
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
847
    """
848
    
849
    if not format_allowed:
850
        return 'text'
851
    
852
    format = request.GET.get('format')
853
    if format == 'json':
854
        return 'json'
855
    elif format == 'xml':
856
        return 'xml'
857
    
858
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
859
        accept, sep, rest = item.strip().partition(';')
860
        if accept == 'application/json':
861
            return 'json'
862
        elif accept == 'application/xml' or accept == 'text/xml':
863
            return 'xml'
864
    
865
    return 'text'
866

    
867
def api_method(http_method=None, format_allowed=False, user_required=True):
868
    """Decorator function for views that implement an API method."""
869
    
870
    def decorator(func):
871
        @wraps(func)
872
        def wrapper(request, *args, **kwargs):
873
            try:
874
                if http_method and request.method != http_method:
875
                    raise BadRequest('Method not allowed.')
876
                if user_required and getattr(request, 'user', None) is None:
877
                    raise Unauthorized('Access denied')
878
                
879
                # The args variable may contain up to (account, container, object).
880
                if len(args) > 1 and len(args[1]) > 256:
881
                    raise BadRequest('Container name too large.')
882
                if len(args) > 2 and len(args[2]) > 1024:
883
                    raise BadRequest('Object name too large.')
884
                
885
                # Format and check headers.
886
                update_request_headers(request)
887
                
888
                # Fill in custom request variables.
889
                request.serialization = request_serialization(request, format_allowed)
890
                request.backend = get_backend()
891
                
892
                response = func(request, *args, **kwargs)
893
                update_response_headers(request, response)
894
                return response
895
            except Fault, fault:
896
                return render_fault(request, fault)
897
            except BaseException, e:
898
                logger.exception('Unexpected error: %s' % e)
899
                fault = InternalServerError('Unexpected error')
900
                return render_fault(request, fault)
901
            finally:
902
                if getattr(request, 'backend', None) is not None:
903
                    request.backend.close()
904
        return wrapper
905
    return decorator