Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 7efc9f86

History | View | Annotate | Download (35.8 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from synnefo.lib.parsedate import parse_http_date_safe, parse_http_date
52
from synnefo.lib.astakos import get_user
53

    
54
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
55
                                Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
56
                                RangeNotSatisfiable, InternalServerError, NotImplemented)
57
from pithos.api.short_url import encode_url
58
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
59
                                    BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
60
                                    BACKEND_BLOCK_UMASK,
61
                                    BACKEND_QUEUE_MODULE, BACKEND_QUEUE_CONNECTION,
62
                                    BACKEND_QUOTA, BACKEND_VERSIONING,
63
                                    AUTHENTICATION_URL, AUTHENTICATION_USERS,
64
                                    SERVICE_TOKEN, COOKIE_NAME)
65

    
66
from pithos.backends import connect_backend
67
from pithos.backends.base import NotAllowedError, QuotaError, ItemNotExists, VersionNotExists
68

    
69
import logging
70
import re
71
import hashlib
72
import uuid
73
import decimal
74

    
75

    
76
logger = logging.getLogger(__name__)
77

    
78

    
79
class UTC(tzinfo):
80
   def utcoffset(self, dt):
81
       return timedelta(0)
82

    
83
   def tzname(self, dt):
84
       return 'UTC'
85

    
86
   def dst(self, dt):
87
       return timedelta(0)
88

    
89
def json_encode_decimal(obj):
90
    if isinstance(obj, decimal.Decimal):
91
        return str(obj)
92
    raise TypeError(repr(obj) + " is not JSON serializable")
93

    
94
def isoformat(d):
95
   """Return an ISO8601 date string that includes a timezone."""
96

    
97
   return d.replace(tzinfo=UTC()).isoformat()
98

    
99
def rename_meta_key(d, old, new):
100
    if old not in d:
101
        return
102
    d[new] = d[old]
103
    del(d[old])
104

    
105
def printable_header_dict(d):
106
    """Format a meta dictionary for printing out json/xml.
107
    
108
    Convert all keys to lower case and replace dashes with underscores.
109
    Format 'last_modified' timestamp.
110
    """
111
    
112
    if 'last_modified' in d and d['last_modified']:
113
        d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
114
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
115

    
116
def format_header_key(k):
117
    """Convert underscores to dashes and capitalize intra-dash strings."""
118
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
119

    
120
def get_header_prefix(request, prefix):
121
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
122
    
123
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
124
    # TODO: Document or remove '~' replacing.
125
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
126

    
127
def check_meta_headers(meta):
128
    if len(meta) > 90:
129
        raise BadRequest('Too many headers.')
130
    for k, v in meta.iteritems():
131
        if len(k) > 128:
132
            raise BadRequest('Header name too large.')
133
        if len(v) > 256:
134
            raise BadRequest('Header value too large.')
135

    
136
def get_account_headers(request):
137
    meta = get_header_prefix(request, 'X-Account-Meta-')
138
    check_meta_headers(meta)
139
    groups = {}
140
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
141
        n = k[16:].lower()
142
        if '-' in n or '_' in n:
143
            raise BadRequest('Bad characters in group name')
144
        groups[n] = v.replace(' ', '').split(',')
145
        while '' in groups[n]:
146
            groups[n].remove('')
147
    return meta, groups
148

    
149
def put_account_headers(response, meta, groups, policy):
150
    if 'count' in meta:
151
        response['X-Account-Container-Count'] = meta['count']
152
    if 'bytes' in meta:
153
        response['X-Account-Bytes-Used'] = meta['bytes']
154
    response['Last-Modified'] = http_date(int(meta['modified']))
155
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
156
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
157
    if 'until_timestamp' in meta:
158
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
159
    for k, v in groups.iteritems():
160
        k = smart_str(k, strings_only=True)
161
        k = format_header_key('X-Account-Group-' + k)
162
        v = smart_str(','.join(v), strings_only=True)
163
        response[k] = v
164
    for k, v in policy.iteritems():
165
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
166

    
167
def get_container_headers(request):
168
    meta = get_header_prefix(request, 'X-Container-Meta-')
169
    check_meta_headers(meta)
170
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
171
    return meta, policy
172

    
173
def put_container_headers(request, response, meta, policy):
174
    if 'count' in meta:
175
        response['X-Container-Object-Count'] = meta['count']
176
    if 'bytes' in meta:
177
        response['X-Container-Bytes-Used'] = meta['bytes']
178
    response['Last-Modified'] = http_date(int(meta['modified']))
179
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
180
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
181
    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
182
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
183
    response['X-Container-Block-Size'] = request.backend.block_size
184
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
185
    if 'until_timestamp' in meta:
186
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
187
    for k, v in policy.iteritems():
188
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
189

    
190
def get_object_headers(request):
191
    content_type = request.META.get('CONTENT_TYPE', None)
192
    meta = get_header_prefix(request, 'X-Object-Meta-')
193
    check_meta_headers(meta)
194
    if request.META.get('HTTP_CONTENT_ENCODING'):
195
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
196
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
197
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
198
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
199
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
200
    return content_type, meta, get_sharing(request), get_public(request)
201

    
202
def put_object_headers(response, meta, restricted=False):
203
    response['ETag'] = meta['checksum']
204
    response['Content-Length'] = meta['bytes']
205
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
206
    response['Last-Modified'] = http_date(int(meta['modified']))
207
    if not restricted:
208
        response['X-Object-Hash'] = meta['hash']
209
        response['X-Object-UUID'] = meta['uuid']
210
        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
211
        response['X-Object-Version'] = meta['version']
212
        response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
213
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
214
            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
215
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
216
                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
217
                  'X-Object-Public'):
218
            if k in meta:
219
                response[k] = smart_str(meta[k], strings_only=True)
220
    else:
221
        for k in ('Content-Encoding', 'Content-Disposition'):
222
            if k in meta:
223
                response[k] = smart_str(meta[k], strings_only=True)
224

    
225
def update_manifest_meta(request, v_account, meta):
226
    """Update metadata if the object has an X-Object-Manifest."""
227
    
228
    if 'X-Object-Manifest' in meta:
229
        etag = ''
230
        bytes = 0
231
        try:
232
            src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
233
            objects = request.backend.list_objects(request.user_uniq, v_account,
234
                                src_container, prefix=src_name, virtual=False)
235
            for x in objects:
236
                src_meta = request.backend.get_object_meta(request.user_uniq,
237
                                        v_account, src_container, x[0], 'pithos', x[1])
238
                etag += src_meta['checksum']
239
                bytes += src_meta['bytes']
240
        except:
241
            # Ignore errors.
242
            return
243
        meta['bytes'] = bytes
244
        md5 = hashlib.md5()
245
        md5.update(etag)
246
        meta['checksum'] = md5.hexdigest().lower()
247

    
248
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
249
    if permissions is None:
250
        return
251
    allowed, perm_path, perms = permissions
252
    if len(perms) == 0:
253
        return
254
    ret = []
255
    r = ','.join(perms.get('read', []))
256
    if r:
257
        ret.append('read=' + r)
258
    w = ','.join(perms.get('write', []))
259
    if w:
260
        ret.append('write=' + w)
261
    meta['X-Object-Sharing'] = '; '.join(ret)
262
    if '/'.join((v_account, v_container, v_object)) != perm_path:
263
        meta['X-Object-Shared-By'] = perm_path
264
    if request.user_uniq != v_account:
265
        meta['X-Object-Allowed-To'] = allowed
266

    
267
def update_public_meta(public, meta):
268
    if not public:
269
        return
270
    meta['X-Object-Public'] = '/public/' + encode_url(public)
271

    
272
def validate_modification_preconditions(request, meta):
273
    """Check that the modified timestamp conforms with the preconditions set."""
274
    
275
    if 'modified' not in meta:
276
        return # TODO: Always return?
277
    
278
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
279
    if if_modified_since is not None:
280
        if_modified_since = parse_http_date_safe(if_modified_since)
281
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
282
        raise NotModified('Resource has not been modified')
283
    
284
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
285
    if if_unmodified_since is not None:
286
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
287
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
288
        raise PreconditionFailed('Resource has been modified')
289

    
290
def validate_matching_preconditions(request, meta):
291
    """Check that the ETag conforms with the preconditions set."""
292
    
293
    etag = meta['checksum']
294
    if not etag:
295
        etag = None
296
    
297
    if_match = request.META.get('HTTP_IF_MATCH')
298
    if if_match is not None:
299
        if etag is None:
300
            raise PreconditionFailed('Resource does not exist')
301
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
302
            raise PreconditionFailed('Resource ETag does not match')
303
    
304
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
305
    if if_none_match is not None:
306
        # TODO: If this passes, must ignore If-Modified-Since header.
307
        if etag is not None:
308
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
309
                # TODO: Continue if an If-Modified-Since header is present.
310
                if request.method in ('HEAD', 'GET'):
311
                    raise NotModified('Resource ETag matches')
312
                raise PreconditionFailed('Resource exists or ETag matches')
313

    
314
def split_container_object_string(s):
315
    if not len(s) > 0 or s[0] != '/':
316
        raise ValueError
317
    s = s[1:]
318
    pos = s.find('/')
319
    if pos == -1 or pos == len(s) - 1:
320
        raise ValueError
321
    return s[:pos], s[(pos + 1):]
322

    
323
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False, delimiter=None):
324
    """Copy or move an object."""
325
    
326
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
327
        del(request.META['CONTENT_TYPE'])
328
    content_type, meta, permissions, public = get_object_headers(request)
329
    if delimiter:
330
            public = False # ignore public in that case
331
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
332
    try:
333
        if move:
334
            version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
335
                                                        dest_account, dest_container, dest_name,
336
                                                        content_type, 'pithos', meta, False, permissions, delimiter)
337
        else:
338
            version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
339
                                                        dest_account, dest_container, dest_name,
340
                                                        content_type, 'pithos', meta, False, permissions, src_version, delimiter)
341
    except NotAllowedError:
342
        raise Forbidden('Not allowed')
343
    except (ItemNotExists, VersionNotExists):
344
        raise ItemNotFound('Container or object does not exist')
345
    except ValueError:
346
        raise BadRequest('Invalid sharing header')
347
    except QuotaError:
348
        raise RequestEntityTooLarge('Quota exceeded')
349
    if public is not None:
350
        try:
351
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
352
        except NotAllowedError:
353
            raise Forbidden('Not allowed')
354
        except ItemNotExists:
355
            raise ItemNotFound('Object does not exist')
356
    return version_id
357

    
358
def get_int_parameter(p):
359
    if p is not None:
360
        try:
361
            p = int(p)
362
        except ValueError:
363
            return None
364
        if p < 0:
365
            return None
366
    return p
367

    
368
def get_content_length(request):
369
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
370
    if content_length is None:
371
        raise LengthRequired('Missing or invalid Content-Length header')
372
    return content_length
373

    
374
def get_range(request, size):
375
    """Parse a Range header from the request.
376
    
377
    Either returns None, when the header is not existent or should be ignored,
378
    or a list of (offset, length) tuples - should be further checked.
379
    """
380
    
381
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
382
    if not ranges.startswith('bytes='):
383
        return None
384
    
385
    ret = []
386
    for r in (x.strip() for x in ranges[6:].split(',')):
387
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
388
        m = p.match(r)
389
        if not m:
390
            return None
391
        offset = m.group('offset')
392
        upto = m.group('upto')
393
        if offset == '' and upto == '':
394
            return None
395
        
396
        if offset != '':
397
            offset = int(offset)
398
            if upto != '':
399
                upto = int(upto)
400
                if offset > upto:
401
                    return None
402
                ret.append((offset, upto - offset + 1))
403
            else:
404
                ret.append((offset, size - offset))
405
        else:
406
            length = int(upto)
407
            ret.append((size - length, length))
408
    
409
    return ret
410

    
411
def get_content_range(request):
412
    """Parse a Content-Range header from the request.
413
    
414
    Either returns None, when the header is not existent or should be ignored,
415
    or an (offset, length, total) tuple - check as length, total may be None.
416
    Returns (None, None, None) if the provided range is '*/*'.
417
    """
418
    
419
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
420
    if not ranges:
421
        return None
422
    
423
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
424
    m = p.match(ranges)
425
    if not m:
426
        if ranges == 'bytes */*':
427
            return (None, None, None)
428
        return None
429
    offset = int(m.group('offset'))
430
    upto = m.group('upto')
431
    total = m.group('total')
432
    if upto != '':
433
        upto = int(upto)
434
    else:
435
        upto = None
436
    if total != '*':
437
        total = int(total)
438
    else:
439
        total = None
440
    if (upto is not None and offset > upto) or \
441
        (total is not None and offset >= total) or \
442
        (total is not None and upto is not None and upto >= total):
443
        return None
444
    
445
    if upto is None:
446
        length = None
447
    else:
448
        length = upto - offset + 1
449
    return (offset, length, total)
450

    
451
def get_sharing(request):
452
    """Parse an X-Object-Sharing header from the request.
453
    
454
    Raises BadRequest on error.
455
    """
456
    
457
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
458
    if permissions is None:
459
        return None
460
    
461
    # TODO: Document or remove '~' replacing.
462
    permissions = permissions.replace('~', '')
463
    
464
    ret = {}
465
    permissions = permissions.replace(' ', '')
466
    if permissions == '':
467
        return ret
468
    for perm in (x for x in permissions.split(';')):
469
        if perm.startswith('read='):
470
            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
471
            if '' in ret['read']:
472
                ret['read'].remove('')
473
            if '*' in ret['read']:
474
                ret['read'] = ['*']
475
            if len(ret['read']) == 0:
476
                raise BadRequest('Bad X-Object-Sharing header value')
477
        elif perm.startswith('write='):
478
            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
479
            if '' in ret['write']:
480
                ret['write'].remove('')
481
            if '*' in ret['write']:
482
                ret['write'] = ['*']
483
            if len(ret['write']) == 0:
484
                raise BadRequest('Bad X-Object-Sharing header value')
485
        else:
486
            raise BadRequest('Bad X-Object-Sharing header value')
487
    
488
    # Keep duplicates only in write list.
489
    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
490
    if dups:
491
        for x in dups:
492
            ret['read'].remove(x)
493
        if len(ret['read']) == 0:
494
            del(ret['read'])
495
    
496
    return ret
497

    
498
def get_public(request):
499
    """Parse an X-Object-Public header from the request.
500
    
501
    Raises BadRequest on error.
502
    """
503
    
504
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
505
    if public is None:
506
        return None
507
    
508
    public = public.replace(' ', '').lower()
509
    if public == 'true':
510
        return True
511
    elif public == 'false' or public == '':
512
        return False
513
    raise BadRequest('Bad X-Object-Public header value')
514

    
515
def raw_input_socket(request):
516
    """Return the socket for reading the rest of the request."""
517
    
518
    server_software = request.META.get('SERVER_SOFTWARE')
519
    if server_software and server_software.startswith('mod_python'):
520
        return request._req
521
    if 'wsgi.input' in request.environ:
522
        return request.environ['wsgi.input']
523
    raise NotImplemented('Unknown server software')
524

    
525
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
526

    
527
def socket_read_iterator(request, length=0, blocksize=4096):
528
    """Return a maximum of blocksize data read from the socket in each iteration.
529
    
530
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
531
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
532
    """
533
    
534
    sock = raw_input_socket(request)
535
    if length < 0: # Chunked transfers
536
        # Small version (server does the dechunking).
537
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
538
            while length < MAX_UPLOAD_SIZE:
539
                data = sock.read(blocksize)
540
                if data == '':
541
                    return
542
                yield data
543
            raise BadRequest('Maximum size is reached')
544
        
545
        # Long version (do the dechunking).
546
        data = ''
547
        while length < MAX_UPLOAD_SIZE:
548
            # Get chunk size.
549
            if hasattr(sock, 'readline'):
550
                chunk_length = sock.readline()
551
            else:
552
                chunk_length = ''
553
                while chunk_length[-1:] != '\n':
554
                    chunk_length += sock.read(1)
555
                chunk_length.strip()
556
            pos = chunk_length.find(';')
557
            if pos >= 0:
558
                chunk_length = chunk_length[:pos]
559
            try:
560
                chunk_length = int(chunk_length, 16)
561
            except Exception, e:
562
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
563
            # Check if done.
564
            if chunk_length == 0:
565
                if len(data) > 0:
566
                    yield data
567
                return
568
            # Get the actual data.
569
            while chunk_length > 0:
570
                chunk = sock.read(min(chunk_length, blocksize))
571
                chunk_length -= len(chunk)
572
                if length > 0:
573
                    length += len(chunk)
574
                data += chunk
575
                if len(data) >= blocksize:
576
                    ret = data[:blocksize]
577
                    data = data[blocksize:]
578
                    yield ret
579
            sock.read(2) # CRLF
580
        raise BadRequest('Maximum size is reached')
581
    else:
582
        if length > MAX_UPLOAD_SIZE:
583
            raise BadRequest('Maximum size is reached')
584
        while length > 0:
585
            data = sock.read(min(length, blocksize))
586
            if not data:
587
                raise BadRequest()
588
            length -= len(data)
589
            yield data
590

    
591
class SaveToBackendHandler(FileUploadHandler):
592
    """Handle a file from an HTML form the django way."""
593
    
594
    def __init__(self, request=None):
595
        super(SaveToBackendHandler, self).__init__(request)
596
        self.backend = request.backend
597
    
598
    def put_data(self, length):
599
        if len(self.data) >= length:
600
            block = self.data[:length]
601
            self.file.hashmap.append(self.backend.put_block(block))
602
            self.md5.update(block)
603
            self.data = self.data[length:]
604
    
605
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
606
        self.md5 = hashlib.md5()        
607
        self.data = ''
608
        self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
609
        self.file.size = 0
610
        self.file.hashmap = []
611
    
612
    def receive_data_chunk(self, raw_data, start):
613
        self.data += raw_data
614
        self.file.size += len(raw_data)
615
        self.put_data(self.request.backend.block_size)
616
        return None
617
    
618
    def file_complete(self, file_size):
619
        l = len(self.data)
620
        if l > 0:
621
            self.put_data(l)
622
        self.file.etag = self.md5.hexdigest().lower()
623
        return self.file
624

    
625
class ObjectWrapper(object):
626
    """Return the object's data block-per-block in each iteration.
627
    
628
    Read from the object using the offset and length provided in each entry of the range list.
629
    """
630
    
631
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
632
        self.backend = backend
633
        self.ranges = ranges
634
        self.sizes = sizes
635
        self.hashmaps = hashmaps
636
        self.boundary = boundary
637
        self.size = sum(self.sizes)
638
        
639
        self.file_index = 0
640
        self.block_index = 0
641
        self.block_hash = -1
642
        self.block = ''
643
        
644
        self.range_index = -1
645
        self.offset, self.length = self.ranges[0]
646
    
647
    def __iter__(self):
648
        return self
649
    
650
    def part_iterator(self):
651
        if self.length > 0:
652
            # Get the file for the current offset.
653
            file_size = self.sizes[self.file_index]
654
            while self.offset >= file_size:
655
                self.offset -= file_size
656
                self.file_index += 1
657
                file_size = self.sizes[self.file_index]
658
            
659
            # Get the block for the current position.
660
            self.block_index = int(self.offset / self.backend.block_size)
661
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
662
                self.block_hash = self.hashmaps[self.file_index][self.block_index]
663
                try:
664
                    self.block = self.backend.get_block(self.block_hash)
665
                except ItemNotExists:
666
                    raise ItemNotFound('Block does not exist')
667
            
668
            # Get the data from the block.
669
            bo = self.offset % self.backend.block_size
670
            bs = self.backend.block_size
671
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
672
                self.sizes[self.file_index] % self.backend.block_size):
673
                bs = self.sizes[self.file_index] % self.backend.block_size
674
            bl = min(self.length, bs - bo)
675
            data = self.block[bo:bo + bl]
676
            self.offset += bl
677
            self.length -= bl
678
            return data
679
        else:
680
            raise StopIteration
681
    
682
    def next(self):
683
        if len(self.ranges) == 1:
684
            return self.part_iterator()
685
        if self.range_index == len(self.ranges):
686
            raise StopIteration
687
        try:
688
            if self.range_index == -1:
689
                raise StopIteration
690
            return self.part_iterator()
691
        except StopIteration:
692
            self.range_index += 1
693
            out = []
694
            if self.range_index < len(self.ranges):
695
                # Part header.
696
                self.offset, self.length = self.ranges[self.range_index]
697
                self.file_index = 0
698
                if self.range_index > 0:
699
                    out.append('')
700
                out.append('--' + self.boundary)
701
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
702
                out.append('Content-Transfer-Encoding: binary')
703
                out.append('')
704
                out.append('')
705
                return '\r\n'.join(out)
706
            else:
707
                # Footer.
708
                out.append('')
709
                out.append('--' + self.boundary + '--')
710
                out.append('')
711
                return '\r\n'.join(out)
712

    
713
def object_data_response(request, sizes, hashmaps, meta, public=False):
714
    """Get the HttpResponse object for replying with the object's data."""
715
    
716
    # Range handling.
717
    size = sum(sizes)
718
    ranges = get_range(request, size)
719
    if ranges is None:
720
        ranges = [(0, size)]
721
        ret = 200
722
    else:
723
        check = [True for offset, length in ranges if
724
                    length <= 0 or length > size or
725
                    offset < 0 or offset >= size or
726
                    offset + length > size]
727
        if len(check) > 0:
728
            raise RangeNotSatisfiable('Requested range exceeds object limits')
729
        ret = 206
730
        if_range = request.META.get('HTTP_IF_RANGE')
731
        if if_range:
732
            try:
733
                # Modification time has passed instead.
734
                last_modified = parse_http_date(if_range)
735
                if last_modified != meta['modified']:
736
                    ranges = [(0, size)]
737
                    ret = 200
738
            except ValueError:
739
                if if_range != meta['checksum']:
740
                    ranges = [(0, size)]
741
                    ret = 200
742
    
743
    if ret == 206 and len(ranges) > 1:
744
        boundary = uuid.uuid4().hex
745
    else:
746
        boundary = ''
747
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
748
    response = HttpResponse(wrapper, status=ret)
749
    put_object_headers(response, meta, public)
750
    if ret == 206:
751
        if len(ranges) == 1:
752
            offset, length = ranges[0]
753
            response['Content-Length'] = length # Update with the correct length.
754
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
755
        else:
756
            del(response['Content-Length'])
757
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
758
    return response
759

    
760
def put_object_block(request, hashmap, data, offset):
761
    """Put one block of data at the given offset."""
762
    
763
    bi = int(offset / request.backend.block_size)
764
    bo = offset % request.backend.block_size
765
    bl = min(len(data), request.backend.block_size - bo)
766
    if bi < len(hashmap):
767
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
768
    else:
769
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
770
    return bl # Return ammount of data written.
771

    
772
def hashmap_md5(backend, hashmap, size):
773
    """Produce the MD5 sum from the data in the hashmap."""
774
    
775
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
776
    md5 = hashlib.md5()
777
    bs = backend.block_size
778
    for bi, hash in enumerate(hashmap):
779
        data = backend.get_block(hash) # Blocks come in padded.
780
        if bi == len(hashmap) - 1:
781
            data = data[:size % bs]
782
        md5.update(data)
783
    return md5.hexdigest().lower()
784

    
785
def simple_list_response(request, l):
786
    if request.serialization == 'text':
787
        return '\n'.join(l) + '\n'
788
    if request.serialization == 'xml':
789
        return render_to_string('items.xml', {'items': l})
790
    if request.serialization == 'json':
791
        return json.dumps(l)
792

    
793
def get_backend():
794
    backend = connect_backend(db_module=BACKEND_DB_MODULE,
795
                              db_connection=BACKEND_DB_CONNECTION,
796
                              block_module=BACKEND_BLOCK_MODULE,
797
                              block_path=BACKEND_BLOCK_PATH,
798
                              block_umask=BACKEND_BLOCK_UMASK,
799
                              queue_module=BACKEND_QUEUE_MODULE,
800
                              queue_connection=BACKEND_QUEUE_CONNECTION)
801
    backend.default_policy['quota'] = BACKEND_QUOTA
802
    backend.default_policy['versioning'] = BACKEND_VERSIONING
803
    return backend
804

    
805
def update_request_headers(request):
806
    # Handle URL-encoded keys and values.
807
    meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
808
    for k, v in meta.iteritems():
809
        try:
810
            k.decode('ascii')
811
            v.decode('ascii')
812
        except UnicodeDecodeError:
813
            raise BadRequest('Bad character in headers.')
814
        if '%' in k or '%' in v:
815
            del(request.META[k])
816
            request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
817

    
818
def update_response_headers(request, response):
819
    if request.serialization == 'xml':
820
        response['Content-Type'] = 'application/xml; charset=UTF-8'
821
    elif request.serialization == 'json':
822
        response['Content-Type'] = 'application/json; charset=UTF-8'
823
    elif not response['Content-Type']:
824
        response['Content-Type'] = 'text/plain; charset=UTF-8'
825
    
826
    if (not response.has_header('Content-Length') and
827
        not (response.has_header('Content-Type') and
828
             response['Content-Type'].startswith('multipart/byteranges'))):
829
        response['Content-Length'] = len(response.content)
830
    
831
    # URL-encode unicode in headers.
832
    meta = response.items()
833
    for k, v in meta:
834
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
835
            k.startswith('X-Object-') or k.startswith('Content-')):
836
            del(response[k])
837
            response[quote(k)] = quote(v, safe='/=,:@; ')
838

    
839
def render_fault(request, fault):
840
    if isinstance(fault, InternalServerError) and settings.DEBUG:
841
        fault.details = format_exc(fault)
842
    
843
    request.serialization = 'text'
844
    data = fault.message + '\n'
845
    if fault.details:
846
        data += '\n' + fault.details
847
    response = HttpResponse(data, status=fault.code)
848
    update_response_headers(request, response)
849
    return response
850

    
851
def request_serialization(request, format_allowed=False):
852
    """Return the serialization format requested.
853
    
854
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
855
    """
856
    
857
    if not format_allowed:
858
        return 'text'
859
    
860
    format = request.GET.get('format')
861
    if format == 'json':
862
        return 'json'
863
    elif format == 'xml':
864
        return 'xml'
865
    
866
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
867
        accept, sep, rest = item.strip().partition(';')
868
        if accept == 'application/json':
869
            return 'json'
870
        elif accept == 'application/xml' or accept == 'text/xml':
871
            return 'xml'
872
    
873
    return 'text'
874

    
875
def api_method(http_method=None, format_allowed=False, user_required=True):
876
    """Decorator function for views that implement an API method."""
877
    
878
    def decorator(func):
879
        @wraps(func)
880
        def wrapper(request, *args, **kwargs):
881
            try:
882
                if http_method and request.method != http_method:
883
                    raise BadRequest('Method not allowed.')
884
                
885
                if user_required:
886
                    token = None
887
                    if request.method in ('HEAD', 'GET') and COOKIE_NAME in request.COOKIES:
888
                        cookie_value = unquote(request.COOKIES.get(COOKIE_NAME, ''))
889
                        if cookie_value and '|' in cookie_value:
890
                            token = cookie_value.split('|', 1)[1]
891
                    get_user(request, AUTHENTICATION_URL, AUTHENTICATION_USERS, token)
892
                    if  getattr(request, 'user', None) is None:
893
                        raise Unauthorized('Access denied')
894
                
895
                # The args variable may contain up to (account, container, object).
896
                if len(args) > 1 and len(args[1]) > 256:
897
                    raise BadRequest('Container name too large.')
898
                if len(args) > 2 and len(args[2]) > 1024:
899
                    raise BadRequest('Object name too large.')
900
                
901
                # Format and check headers.
902
                update_request_headers(request)
903
                
904
                # Fill in custom request variables.
905
                request.serialization = request_serialization(request, format_allowed)
906
                request.backend = get_backend()
907
                
908
                response = func(request, *args, **kwargs)
909
                update_response_headers(request, response)
910
                return response
911
            except Fault, fault:
912
                return render_fault(request, fault)
913
            except BaseException, e:
914
                logger.exception('Unexpected error: %s' % e)
915
                fault = InternalServerError('Unexpected error: %s' % e)
916
                return render_fault(request, fault)
917
            finally:
918
                if getattr(request, 'backend', None) is not None:
919
                    request.backend.close()
920
        return wrapper
921
    return decorator