Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 0af13d50

History | View | Annotate | Download (35 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from synnefo.lib.parsedate import parse_http_date_safe, parse_http_date
52

    
53
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
54
                                Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
55
                                RangeNotSatisfiable, InternalServerError, NotImplemented)
56
from pithos.api.short_url import encode_url
57
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
58
                                    BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
59
                                    BACKEND_BLOCK_UMASK,
60
                                    BACKEND_QUEUE_MODULE, BACKEND_QUEUE_CONNECTION,
61
                                    BACKEND_QUOTA, BACKEND_VERSIONING)
62
from pithos.backends import connect_backend
63
from pithos.backends.base import NotAllowedError, QuotaError
64

    
65
import logging
66
import re
67
import hashlib
68
import uuid
69
import decimal
70

    
71

    
72
logger = logging.getLogger(__name__)
73

    
74

    
75
class UTC(tzinfo):
76
   def utcoffset(self, dt):
77
       return timedelta(0)
78

    
79
   def tzname(self, dt):
80
       return 'UTC'
81

    
82
   def dst(self, dt):
83
       return timedelta(0)
84

    
85
def json_encode_decimal(obj):
86
    if isinstance(obj, decimal.Decimal):
87
        return str(obj)
88
    raise TypeError(repr(obj) + " is not JSON serializable")
89

    
90
def isoformat(d):
91
   """Return an ISO8601 date string that includes a timezone."""
92

    
93
   return d.replace(tzinfo=UTC()).isoformat()
94

    
95
def rename_meta_key(d, old, new):
96
    if old not in d:
97
        return
98
    d[new] = d[old]
99
    del(d[old])
100

    
101
def printable_header_dict(d):
102
    """Format a meta dictionary for printing out json/xml.
103
    
104
    Convert all keys to lower case and replace dashes with underscores.
105
    Format 'last_modified' timestamp.
106
    """
107
    
108
    if 'last_modified' in d and d['last_modified']:
109
        d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
110
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
111

    
112
def format_header_key(k):
113
    """Convert underscores to dashes and capitalize intra-dash strings."""
114
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
115

    
116
def get_header_prefix(request, prefix):
117
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
118
    
119
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
120
    # TODO: Document or remove '~' replacing.
121
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
122

    
123
def check_meta_headers(meta):
124
    if len(meta) > 90:
125
        raise BadRequest('Too many headers.')
126
    for k, v in meta.iteritems():
127
        if len(k) > 128:
128
            raise BadRequest('Header name too large.')
129
        if len(v) > 256:
130
            raise BadRequest('Header value too large.')
131

    
132
def get_account_headers(request):
133
    meta = get_header_prefix(request, 'X-Account-Meta-')
134
    check_meta_headers(meta)
135
    groups = {}
136
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
137
        n = k[16:].lower()
138
        if '-' in n or '_' in n:
139
            raise BadRequest('Bad characters in group name')
140
        groups[n] = v.replace(' ', '').split(',')
141
        while '' in groups[n]:
142
            groups[n].remove('')
143
    return meta, groups
144

    
145
def put_account_headers(response, meta, groups, policy):
146
    if 'count' in meta:
147
        response['X-Account-Container-Count'] = meta['count']
148
    if 'bytes' in meta:
149
        response['X-Account-Bytes-Used'] = meta['bytes']
150
    response['Last-Modified'] = http_date(int(meta['modified']))
151
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
152
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
153
    if 'until_timestamp' in meta:
154
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
155
    for k, v in groups.iteritems():
156
        k = smart_str(k, strings_only=True)
157
        k = format_header_key('X-Account-Group-' + k)
158
        v = smart_str(','.join(v), strings_only=True)
159
        response[k] = v
160
    for k, v in policy.iteritems():
161
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
162

    
163
def get_container_headers(request):
164
    meta = get_header_prefix(request, 'X-Container-Meta-')
165
    check_meta_headers(meta)
166
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
167
    return meta, policy
168

    
169
def put_container_headers(request, response, meta, policy):
170
    if 'count' in meta:
171
        response['X-Container-Object-Count'] = meta['count']
172
    if 'bytes' in meta:
173
        response['X-Container-Bytes-Used'] = meta['bytes']
174
    response['Last-Modified'] = http_date(int(meta['modified']))
175
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
176
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
177
    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
178
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
179
    response['X-Container-Block-Size'] = request.backend.block_size
180
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
181
    if 'until_timestamp' in meta:
182
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
183
    for k, v in policy.iteritems():
184
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
185

    
186
def get_object_headers(request):
187
    content_type = request.META.get('CONTENT_TYPE', None)
188
    meta = get_header_prefix(request, 'X-Object-Meta-')
189
    check_meta_headers(meta)
190
    if request.META.get('HTTP_CONTENT_ENCODING'):
191
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
192
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
193
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
194
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
195
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
196
    return content_type, meta, get_sharing(request), get_public(request)
197

    
198
def put_object_headers(response, meta, restricted=False):
199
    response['ETag'] = meta['checksum']
200
    response['Content-Length'] = meta['bytes']
201
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
202
    response['Last-Modified'] = http_date(int(meta['modified']))
203
    if not restricted:
204
        response['X-Object-Hash'] = meta['hash']
205
        response['X-Object-UUID'] = meta['uuid']
206
        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
207
        response['X-Object-Version'] = meta['version']
208
        response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
209
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
210
            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
211
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
212
                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
213
                  'X-Object-Public'):
214
            if k in meta:
215
                response[k] = smart_str(meta[k], strings_only=True)
216
    else:
217
        for k in ('Content-Encoding', 'Content-Disposition'):
218
            if k in meta:
219
                response[k] = smart_str(meta[k], strings_only=True)
220

    
221
def update_manifest_meta(request, v_account, meta):
222
    """Update metadata if the object has an X-Object-Manifest."""
223
    
224
    if 'X-Object-Manifest' in meta:
225
        etag = ''
226
        bytes = 0
227
        try:
228
            src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
229
            objects = request.backend.list_objects(request.user_uniq, v_account,
230
                                src_container, prefix=src_name, virtual=False)
231
            for x in objects:
232
                src_meta = request.backend.get_object_meta(request.user_uniq,
233
                                        v_account, src_container, x[0], 'pithos', x[1])
234
                etag += src_meta['checksum']
235
                bytes += src_meta['bytes']
236
        except:
237
            # Ignore errors.
238
            return
239
        meta['bytes'] = bytes
240
        md5 = hashlib.md5()
241
        md5.update(etag)
242
        meta['checksum'] = md5.hexdigest().lower()
243

    
244
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
245
    if permissions is None:
246
        return
247
    allowed, perm_path, perms = permissions
248
    if len(perms) == 0:
249
        return
250
    ret = []
251
    r = ','.join(perms.get('read', []))
252
    if r:
253
        ret.append('read=' + r)
254
    w = ','.join(perms.get('write', []))
255
    if w:
256
        ret.append('write=' + w)
257
    meta['X-Object-Sharing'] = '; '.join(ret)
258
    if '/'.join((v_account, v_container, v_object)) != perm_path:
259
        meta['X-Object-Shared-By'] = perm_path
260
    if request.user_uniq != v_account:
261
        meta['X-Object-Allowed-To'] = allowed
262

    
263
def update_public_meta(public, meta):
264
    if not public:
265
        return
266
    meta['X-Object-Public'] = '/public/' + encode_url(public)
267

    
268
def validate_modification_preconditions(request, meta):
269
    """Check that the modified timestamp conforms with the preconditions set."""
270
    
271
    if 'modified' not in meta:
272
        return # TODO: Always return?
273
    
274
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
275
    if if_modified_since is not None:
276
        if_modified_since = parse_http_date_safe(if_modified_since)
277
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
278
        raise NotModified('Resource has not been modified')
279
    
280
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
281
    if if_unmodified_since is not None:
282
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
283
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
284
        raise PreconditionFailed('Resource has been modified')
285

    
286
def validate_matching_preconditions(request, meta):
287
    """Check that the ETag conforms with the preconditions set."""
288
    
289
    etag = meta['checksum']
290
    if not etag:
291
        etag = None
292
    
293
    if_match = request.META.get('HTTP_IF_MATCH')
294
    if if_match is not None:
295
        if etag is None:
296
            raise PreconditionFailed('Resource does not exist')
297
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
298
            raise PreconditionFailed('Resource ETag does not match')
299
    
300
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
301
    if if_none_match is not None:
302
        # TODO: If this passes, must ignore If-Modified-Since header.
303
        if etag is not None:
304
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
305
                # TODO: Continue if an If-Modified-Since header is present.
306
                if request.method in ('HEAD', 'GET'):
307
                    raise NotModified('Resource ETag matches')
308
                raise PreconditionFailed('Resource exists or ETag matches')
309

    
310
def split_container_object_string(s):
311
    if not len(s) > 0 or s[0] != '/':
312
        raise ValueError
313
    s = s[1:]
314
    pos = s.find('/')
315
    if pos == -1 or pos == len(s) - 1:
316
        raise ValueError
317
    return s[:pos], s[(pos + 1):]
318

    
319
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False):
320
    """Copy or move an object."""
321
    
322
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
323
        del(request.META['CONTENT_TYPE'])
324
    content_type, meta, permissions, public = get_object_headers(request)
325
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
326
    try:
327
        if move:
328
            version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
329
                                                        dest_account, dest_container, dest_name,
330
                                                        content_type, 'pithos', meta, False, permissions)
331
        else:
332
            version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
333
                                                        dest_account, dest_container, dest_name,
334
                                                        content_type, 'pithos', meta, False, permissions, src_version)
335
    except NotAllowedError:
336
        raise Forbidden('Not allowed')
337
    except (NameError, IndexError):
338
        raise ItemNotFound('Container or object does not exist')
339
    except ValueError:
340
        raise BadRequest('Invalid sharing header')
341
    except QuotaError:
342
        raise RequestEntityTooLarge('Quota exceeded')
343
    if public is not None:
344
        try:
345
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
346
        except NotAllowedError:
347
            raise Forbidden('Not allowed')
348
        except NameError:
349
            raise ItemNotFound('Object does not exist')
350
    return version_id
351

    
352
def get_int_parameter(p):
353
    if p is not None:
354
        try:
355
            p = int(p)
356
        except ValueError:
357
            return None
358
        if p < 0:
359
            return None
360
    return p
361

    
362
def get_content_length(request):
363
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
364
    if content_length is None:
365
        raise LengthRequired('Missing or invalid Content-Length header')
366
    return content_length
367

    
368
def get_range(request, size):
369
    """Parse a Range header from the request.
370
    
371
    Either returns None, when the header is not existent or should be ignored,
372
    or a list of (offset, length) tuples - should be further checked.
373
    """
374
    
375
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
376
    if not ranges.startswith('bytes='):
377
        return None
378
    
379
    ret = []
380
    for r in (x.strip() for x in ranges[6:].split(',')):
381
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
382
        m = p.match(r)
383
        if not m:
384
            return None
385
        offset = m.group('offset')
386
        upto = m.group('upto')
387
        if offset == '' and upto == '':
388
            return None
389
        
390
        if offset != '':
391
            offset = int(offset)
392
            if upto != '':
393
                upto = int(upto)
394
                if offset > upto:
395
                    return None
396
                ret.append((offset, upto - offset + 1))
397
            else:
398
                ret.append((offset, size - offset))
399
        else:
400
            length = int(upto)
401
            ret.append((size - length, length))
402
    
403
    return ret
404

    
405
def get_content_range(request):
406
    """Parse a Content-Range header from the request.
407
    
408
    Either returns None, when the header is not existent or should be ignored,
409
    or an (offset, length, total) tuple - check as length, total may be None.
410
    Returns (None, None, None) if the provided range is '*/*'.
411
    """
412
    
413
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
414
    if not ranges:
415
        return None
416
    
417
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
418
    m = p.match(ranges)
419
    if not m:
420
        if ranges == 'bytes */*':
421
            return (None, None, None)
422
        return None
423
    offset = int(m.group('offset'))
424
    upto = m.group('upto')
425
    total = m.group('total')
426
    if upto != '':
427
        upto = int(upto)
428
    else:
429
        upto = None
430
    if total != '*':
431
        total = int(total)
432
    else:
433
        total = None
434
    if (upto is not None and offset > upto) or \
435
        (total is not None and offset >= total) or \
436
        (total is not None and upto is not None and upto >= total):
437
        return None
438
    
439
    if upto is None:
440
        length = None
441
    else:
442
        length = upto - offset + 1
443
    return (offset, length, total)
444

    
445
def get_sharing(request):
446
    """Parse an X-Object-Sharing header from the request.
447
    
448
    Raises BadRequest on error.
449
    """
450
    
451
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
452
    if permissions is None:
453
        return None
454
    
455
    # TODO: Document or remove '~' replacing.
456
    permissions = permissions.replace('~', '')
457
    
458
    ret = {}
459
    permissions = permissions.replace(' ', '')
460
    if permissions == '':
461
        return ret
462
    for perm in (x for x in permissions.split(';')):
463
        if perm.startswith('read='):
464
            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
465
            if '' in ret['read']:
466
                ret['read'].remove('')
467
            if '*' in ret['read']:
468
                ret['read'] = ['*']
469
            if len(ret['read']) == 0:
470
                raise BadRequest('Bad X-Object-Sharing header value')
471
        elif perm.startswith('write='):
472
            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
473
            if '' in ret['write']:
474
                ret['write'].remove('')
475
            if '*' in ret['write']:
476
                ret['write'] = ['*']
477
            if len(ret['write']) == 0:
478
                raise BadRequest('Bad X-Object-Sharing header value')
479
        else:
480
            raise BadRequest('Bad X-Object-Sharing header value')
481
    
482
    # Keep duplicates only in write list.
483
    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
484
    if dups:
485
        for x in dups:
486
            ret['read'].remove(x)
487
        if len(ret['read']) == 0:
488
            del(ret['read'])
489
    
490
    return ret
491

    
492
def get_public(request):
493
    """Parse an X-Object-Public header from the request.
494
    
495
    Raises BadRequest on error.
496
    """
497
    
498
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
499
    if public is None:
500
        return None
501
    
502
    public = public.replace(' ', '').lower()
503
    if public == 'true':
504
        return True
505
    elif public == 'false' or public == '':
506
        return False
507
    raise BadRequest('Bad X-Object-Public header value')
508

    
509
def raw_input_socket(request):
510
    """Return the socket for reading the rest of the request."""
511
    
512
    server_software = request.META.get('SERVER_SOFTWARE')
513
    if server_software and server_software.startswith('mod_python'):
514
        return request._req
515
    if 'wsgi.input' in request.environ:
516
        return request.environ['wsgi.input']
517
    raise NotImplemented('Unknown server software')
518

    
519
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
520

    
521
def socket_read_iterator(request, length=0, blocksize=4096):
522
    """Return a maximum of blocksize data read from the socket in each iteration.
523
    
524
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
525
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
526
    """
527
    
528
    sock = raw_input_socket(request)
529
    if length < 0: # Chunked transfers
530
        # Small version (server does the dechunking).
531
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
532
            while length < MAX_UPLOAD_SIZE:
533
                data = sock.read(blocksize)
534
                if data == '':
535
                    return
536
                yield data
537
            raise BadRequest('Maximum size is reached')
538
        
539
        # Long version (do the dechunking).
540
        data = ''
541
        while length < MAX_UPLOAD_SIZE:
542
            # Get chunk size.
543
            if hasattr(sock, 'readline'):
544
                chunk_length = sock.readline()
545
            else:
546
                chunk_length = ''
547
                while chunk_length[-1:] != '\n':
548
                    chunk_length += sock.read(1)
549
                chunk_length.strip()
550
            pos = chunk_length.find(';')
551
            if pos >= 0:
552
                chunk_length = chunk_length[:pos]
553
            try:
554
                chunk_length = int(chunk_length, 16)
555
            except Exception, e:
556
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
557
            # Check if done.
558
            if chunk_length == 0:
559
                if len(data) > 0:
560
                    yield data
561
                return
562
            # Get the actual data.
563
            while chunk_length > 0:
564
                chunk = sock.read(min(chunk_length, blocksize))
565
                chunk_length -= len(chunk)
566
                if length > 0:
567
                    length += len(chunk)
568
                data += chunk
569
                if len(data) >= blocksize:
570
                    ret = data[:blocksize]
571
                    data = data[blocksize:]
572
                    yield ret
573
            sock.read(2) # CRLF
574
        raise BadRequest('Maximum size is reached')
575
    else:
576
        if length > MAX_UPLOAD_SIZE:
577
            raise BadRequest('Maximum size is reached')
578
        while length > 0:
579
            data = sock.read(min(length, blocksize))
580
            if not data:
581
                raise BadRequest()
582
            length -= len(data)
583
            yield data
584

    
585
class SaveToBackendHandler(FileUploadHandler):
586
    """Handle a file from an HTML form the django way."""
587
    
588
    def __init__(self, request=None):
589
        super(SaveToBackendHandler, self).__init__(request)
590
        self.backend = request.backend
591
    
592
    def put_data(self, length):
593
        if len(self.data) >= length:
594
            block = self.data[:length]
595
            self.file.hashmap.append(self.backend.put_block(block))
596
            self.md5.update(block)
597
            self.data = self.data[length:]
598
    
599
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
600
        self.md5 = hashlib.md5()        
601
        self.data = ''
602
        self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
603
        self.file.size = 0
604
        self.file.hashmap = []
605
    
606
    def receive_data_chunk(self, raw_data, start):
607
        self.data += raw_data
608
        self.file.size += len(raw_data)
609
        self.put_data(self.request.backend.block_size)
610
        return None
611
    
612
    def file_complete(self, file_size):
613
        l = len(self.data)
614
        if l > 0:
615
            self.put_data(l)
616
        self.file.etag = self.md5.hexdigest().lower()
617
        return self.file
618

    
619
class ObjectWrapper(object):
620
    """Return the object's data block-per-block in each iteration.
621
    
622
    Read from the object using the offset and length provided in each entry of the range list.
623
    """
624
    
625
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
626
        self.backend = backend
627
        self.ranges = ranges
628
        self.sizes = sizes
629
        self.hashmaps = hashmaps
630
        self.boundary = boundary
631
        self.size = sum(self.sizes)
632
        
633
        self.file_index = 0
634
        self.block_index = 0
635
        self.block_hash = -1
636
        self.block = ''
637
        
638
        self.range_index = -1
639
        self.offset, self.length = self.ranges[0]
640
    
641
    def __iter__(self):
642
        return self
643
    
644
    def part_iterator(self):
645
        if self.length > 0:
646
            # Get the file for the current offset.
647
            file_size = self.sizes[self.file_index]
648
            while self.offset >= file_size:
649
                self.offset -= file_size
650
                self.file_index += 1
651
                file_size = self.sizes[self.file_index]
652
            
653
            # Get the block for the current position.
654
            self.block_index = int(self.offset / self.backend.block_size)
655
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
656
                self.block_hash = self.hashmaps[self.file_index][self.block_index]
657
                try:
658
                    self.block = self.backend.get_block(self.block_hash)
659
                except NameError:
660
                    raise ItemNotFound('Block does not exist')
661
            
662
            # Get the data from the block.
663
            bo = self.offset % self.backend.block_size
664
            bs = self.backend.block_size
665
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
666
                self.sizes[self.file_index] % self.backend.block_size):
667
                bs = self.sizes[self.file_index] % self.backend.block_size
668
            bl = min(self.length, bs - bo)
669
            data = self.block[bo:bo + bl]
670
            self.offset += bl
671
            self.length -= bl
672
            return data
673
        else:
674
            raise StopIteration
675
    
676
    def next(self):
677
        if len(self.ranges) == 1:
678
            return self.part_iterator()
679
        if self.range_index == len(self.ranges):
680
            raise StopIteration
681
        try:
682
            if self.range_index == -1:
683
                raise StopIteration
684
            return self.part_iterator()
685
        except StopIteration:
686
            self.range_index += 1
687
            out = []
688
            if self.range_index < len(self.ranges):
689
                # Part header.
690
                self.offset, self.length = self.ranges[self.range_index]
691
                self.file_index = 0
692
                if self.range_index > 0:
693
                    out.append('')
694
                out.append('--' + self.boundary)
695
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
696
                out.append('Content-Transfer-Encoding: binary')
697
                out.append('')
698
                out.append('')
699
                return '\r\n'.join(out)
700
            else:
701
                # Footer.
702
                out.append('')
703
                out.append('--' + self.boundary + '--')
704
                out.append('')
705
                return '\r\n'.join(out)
706

    
707
def object_data_response(request, sizes, hashmaps, meta, public=False):
708
    """Get the HttpResponse object for replying with the object's data."""
709
    
710
    # Range handling.
711
    size = sum(sizes)
712
    ranges = get_range(request, size)
713
    if ranges is None:
714
        ranges = [(0, size)]
715
        ret = 200
716
    else:
717
        check = [True for offset, length in ranges if
718
                    length <= 0 or length > size or
719
                    offset < 0 or offset >= size or
720
                    offset + length > size]
721
        if len(check) > 0:
722
            raise RangeNotSatisfiable('Requested range exceeds object limits')
723
        ret = 206
724
        if_range = request.META.get('HTTP_IF_RANGE')
725
        if if_range:
726
            try:
727
                # Modification time has passed instead.
728
                last_modified = parse_http_date(if_range)
729
                if last_modified != meta['modified']:
730
                    ranges = [(0, size)]
731
                    ret = 200
732
            except ValueError:
733
                if if_range != meta['checksum']:
734
                    ranges = [(0, size)]
735
                    ret = 200
736
    
737
    if ret == 206 and len(ranges) > 1:
738
        boundary = uuid.uuid4().hex
739
    else:
740
        boundary = ''
741
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
742
    response = HttpResponse(wrapper, status=ret)
743
    put_object_headers(response, meta, public)
744
    if ret == 206:
745
        if len(ranges) == 1:
746
            offset, length = ranges[0]
747
            response['Content-Length'] = length # Update with the correct length.
748
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
749
        else:
750
            del(response['Content-Length'])
751
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
752
    return response
753

    
754
def put_object_block(request, hashmap, data, offset):
755
    """Put one block of data at the given offset."""
756
    
757
    bi = int(offset / request.backend.block_size)
758
    bo = offset % request.backend.block_size
759
    bl = min(len(data), request.backend.block_size - bo)
760
    if bi < len(hashmap):
761
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
762
    else:
763
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
764
    return bl # Return ammount of data written.
765

    
766
def hashmap_md5(backend, hashmap, size):
767
    """Produce the MD5 sum from the data in the hashmap."""
768
    
769
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
770
    md5 = hashlib.md5()
771
    bs = backend.block_size
772
    for bi, hash in enumerate(hashmap):
773
        data = backend.get_block(hash) # Blocks come in padded.
774
        if bi == len(hashmap) - 1:
775
            data = data[:size % bs]
776
        md5.update(data)
777
    return md5.hexdigest().lower()
778

    
779
def simple_list_response(request, l):
780
    if request.serialization == 'text':
781
        return '\n'.join(l) + '\n'
782
    if request.serialization == 'xml':
783
        return render_to_string('items.xml', {'items': l})
784
    if request.serialization == 'json':
785
        return json.dumps(l)
786

    
787
def get_backend():
788
    backend = connect_backend(db_module=BACKEND_DB_MODULE,
789
                              db_connection=BACKEND_DB_CONNECTION,
790
                              block_module=BACKEND_BLOCK_MODULE,
791
                              block_path=BACKEND_BLOCK_PATH,
792
                              block_umask=BACKEND_BLOCK_UMASK,
793
                              queue_module=BACKEND_QUEUE_MODULE,
794
                              queue_connection=BACKEND_QUEUE_CONNECTION)
795
    backend.default_policy['quota'] = BACKEND_QUOTA
796
    backend.default_policy['versioning'] = BACKEND_VERSIONING
797
    return backend
798

    
799
def update_request_headers(request):
800
    # Handle URL-encoded keys and values.
801
    meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
802
    for k, v in meta.iteritems():
803
        try:
804
            k.decode('ascii')
805
            v.decode('ascii')
806
        except UnicodeDecodeError:
807
            raise BadRequest('Bad character in headers.')
808
        if '%' in k or '%' in v:
809
            del(request.META[k])
810
            request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
811

    
812
def update_response_headers(request, response):
813
    if request.serialization == 'xml':
814
        response['Content-Type'] = 'application/xml; charset=UTF-8'
815
    elif request.serialization == 'json':
816
        response['Content-Type'] = 'application/json; charset=UTF-8'
817
    elif not response['Content-Type']:
818
        response['Content-Type'] = 'text/plain; charset=UTF-8'
819
    
820
    if (not response.has_header('Content-Length') and
821
        not (response.has_header('Content-Type') and
822
             response['Content-Type'].startswith('multipart/byteranges'))):
823
        response['Content-Length'] = len(response.content)
824
    
825
    # URL-encode unicode in headers.
826
    meta = response.items()
827
    for k, v in meta:
828
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
829
            k.startswith('X-Object-') or k.startswith('Content-')):
830
            del(response[k])
831
            response[quote(k)] = quote(v, safe='/=,:@; ')
832

    
833
def render_fault(request, fault):
834
    if isinstance(fault, InternalServerError) and settings.DEBUG:
835
        fault.details = format_exc(fault)
836
    
837
    request.serialization = 'text'
838
    data = fault.message + '\n'
839
    if fault.details:
840
        data += '\n' + fault.details
841
    response = HttpResponse(data, status=fault.code)
842
    update_response_headers(request, response)
843
    return response
844

    
845
def request_serialization(request, format_allowed=False):
846
    """Return the serialization format requested.
847
    
848
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
849
    """
850
    
851
    if not format_allowed:
852
        return 'text'
853
    
854
    format = request.GET.get('format')
855
    if format == 'json':
856
        return 'json'
857
    elif format == 'xml':
858
        return 'xml'
859
    
860
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
861
        accept, sep, rest = item.strip().partition(';')
862
        if accept == 'application/json':
863
            return 'json'
864
        elif accept == 'application/xml' or accept == 'text/xml':
865
            return 'xml'
866
    
867
    return 'text'
868

    
869
def api_method(http_method=None, format_allowed=False, user_required=True):
870
    """Decorator function for views that implement an API method."""
871
    
872
    def decorator(func):
873
        @wraps(func)
874
        def wrapper(request, *args, **kwargs):
875
            try:
876
                if http_method and request.method != http_method:
877
                    raise BadRequest('Method not allowed.')
878
                if user_required and getattr(request, 'user', None) is None:
879
                    raise Unauthorized('Access denied')
880
                
881
                # The args variable may contain up to (account, container, object).
882
                if len(args) > 1 and len(args[1]) > 256:
883
                    raise BadRequest('Container name too large.')
884
                if len(args) > 2 and len(args[2]) > 1024:
885
                    raise BadRequest('Object name too large.')
886
                
887
                # Format and check headers.
888
                update_request_headers(request)
889
                
890
                # Fill in custom request variables.
891
                request.serialization = request_serialization(request, format_allowed)
892
                request.backend = get_backend()
893
                
894
                response = func(request, *args, **kwargs)
895
                update_response_headers(request, response)
896
                return response
897
            except Fault, fault:
898
                return render_fault(request, fault)
899
            except BaseException, e:
900
                logger.exception('Unexpected error: %s' % e)
901
                fault = InternalServerError('Unexpected error')
902
                return render_fault(request, fault)
903
            finally:
904
                if getattr(request, 'backend', None) is not None:
905
                    request.backend.close()
906
        return wrapper
907
    return decorator