Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ f77b1da9

History | View | Annotate | Download (36.1 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from synnefo.lib.parsedate import parse_http_date_safe, parse_http_date
52
from synnefo.lib.astakos import get_user
53

    
54
from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
55
                                Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
56
                                RangeNotSatisfiable, InternalServerError, NotImplemented)
57
from pithos.api.short_url import encode_url
58
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
59
                                 BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
60
                                 BACKEND_BLOCK_UMASK,
61
                                 BACKEND_QUEUE_MODULE, BACKEND_QUEUE_CONNECTION,
62
                                 BACKEND_QUOTA, BACKEND_VERSIONING,
63
                                 AUTHENTICATION_URL, AUTHENTICATION_USERS,
64
                                 SERVICE_TOKEN, COOKIE_NAME)
65

    
66
from pithos.backends import connect_backend
67
from pithos.backends.base import NotAllowedError, QuotaError, ItemNotExists, VersionNotExists
68

    
69
import logging
70
import re
71
import hashlib
72
import uuid
73
import decimal
74

    
75

    
76
logger = logging.getLogger(__name__)
77

    
78

    
79
class UTC(tzinfo):
80
   def utcoffset(self, dt):
81
       return timedelta(0)
82

    
83
   def tzname(self, dt):
84
       return 'UTC'
85

    
86
   def dst(self, dt):
87
       return timedelta(0)
88

    
89
def json_encode_decimal(obj):
90
    if isinstance(obj, decimal.Decimal):
91
        return str(obj)
92
    raise TypeError(repr(obj) + " is not JSON serializable")
93

    
94
def isoformat(d):
95
   """Return an ISO8601 date string that includes a timezone."""
96

    
97
   return d.replace(tzinfo=UTC()).isoformat()
98

    
99
def rename_meta_key(d, old, new):
100
    if old not in d:
101
        return
102
    d[new] = d[old]
103
    del(d[old])
104

    
105
def printable_header_dict(d):
106
    """Format a meta dictionary for printing out json/xml.
107
    
108
    Convert all keys to lower case and replace dashes with underscores.
109
    Format 'last_modified' timestamp.
110
    """
111
    
112
    if 'last_modified' in d and d['last_modified']:
113
        d['last_modified'] = isoformat(datetime.fromtimestamp(d['last_modified']))
114
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
115

    
116
def format_header_key(k):
117
    """Convert underscores to dashes and capitalize intra-dash strings."""
118
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
119

    
120
def get_header_prefix(request, prefix):
121
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
122
    
123
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
124
    # TODO: Document or remove '~' replacing.
125
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
126

    
127
def check_meta_headers(meta):
128
    if len(meta) > 90:
129
        raise BadRequest('Too many headers.')
130
    for k, v in meta.iteritems():
131
        if len(k) > 128:
132
            raise BadRequest('Header name too large.')
133
        if len(v) > 256:
134
            raise BadRequest('Header value too large.')
135

    
136
def get_account_headers(request):
137
    meta = get_header_prefix(request, 'X-Account-Meta-')
138
    check_meta_headers(meta)
139
    groups = {}
140
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
141
        n = k[16:].lower()
142
        if '-' in n or '_' in n:
143
            raise BadRequest('Bad characters in group name')
144
        groups[n] = v.replace(' ', '').split(',')
145
        while '' in groups[n]:
146
            groups[n].remove('')
147
    return meta, groups
148

    
149
def put_account_headers(response, meta, groups, policy):
150
    if 'count' in meta:
151
        response['X-Account-Container-Count'] = meta['count']
152
    if 'bytes' in meta:
153
        response['X-Account-Bytes-Used'] = meta['bytes']
154
    response['Last-Modified'] = http_date(int(meta['modified']))
155
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
156
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
157
    if 'until_timestamp' in meta:
158
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
159
    for k, v in groups.iteritems():
160
        k = smart_str(k, strings_only=True)
161
        k = format_header_key('X-Account-Group-' + k)
162
        v = smart_str(','.join(v), strings_only=True)
163
        response[k] = v
164
    for k, v in policy.iteritems():
165
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
166

    
167
def get_container_headers(request):
168
    meta = get_header_prefix(request, 'X-Container-Meta-')
169
    check_meta_headers(meta)
170
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
171
    return meta, policy
172

    
173
def put_container_headers(request, response, meta, policy):
174
    if 'count' in meta:
175
        response['X-Container-Object-Count'] = meta['count']
176
    if 'bytes' in meta:
177
        response['X-Container-Bytes-Used'] = meta['bytes']
178
    response['Last-Modified'] = http_date(int(meta['modified']))
179
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
180
        response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
181
    l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
182
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
183
    response['X-Container-Block-Size'] = request.backend.block_size
184
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
185
    if 'until_timestamp' in meta:
186
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
187
    for k, v in policy.iteritems():
188
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
189

    
190
def get_object_headers(request):
191
    content_type = request.META.get('CONTENT_TYPE', None)
192
    meta = get_header_prefix(request, 'X-Object-Meta-')
193
    check_meta_headers(meta)
194
    if request.META.get('HTTP_CONTENT_ENCODING'):
195
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
196
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
197
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
198
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
199
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
200
    return content_type, meta, get_sharing(request), get_public(request)
201

    
202
def put_object_headers(response, meta, restricted=False):
203
    response['ETag'] = meta['checksum']
204
    response['Content-Length'] = meta['bytes']
205
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
206
    response['Last-Modified'] = http_date(int(meta['modified']))
207
    if not restricted:
208
        response['X-Object-Hash'] = meta['hash']
209
        response['X-Object-UUID'] = meta['uuid']
210
        response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
211
        response['X-Object-Version'] = meta['version']
212
        response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
213
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
214
            response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
215
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
216
                  'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
217
                  'X-Object-Public'):
218
            if k in meta:
219
                response[k] = smart_str(meta[k], strings_only=True)
220
    else:
221
        for k in ('Content-Encoding', 'Content-Disposition'):
222
            if k in meta:
223
                response[k] = smart_str(meta[k], strings_only=True)
224

    
225
def update_manifest_meta(request, v_account, meta):
226
    """Update metadata if the object has an X-Object-Manifest."""
227
    
228
    if 'X-Object-Manifest' in meta:
229
        etag = ''
230
        bytes = 0
231
        try:
232
            src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
233
            objects = request.backend.list_objects(request.user_uniq, v_account,
234
                                src_container, prefix=src_name, virtual=False)
235
            for x in objects:
236
                src_meta = request.backend.get_object_meta(request.user_uniq,
237
                                        v_account, src_container, x[0], 'pithos', x[1])
238
                etag += src_meta['checksum']
239
                bytes += src_meta['bytes']
240
        except:
241
            # Ignore errors.
242
            return
243
        meta['bytes'] = bytes
244
        md5 = hashlib.md5()
245
        md5.update(etag)
246
        meta['checksum'] = md5.hexdigest().lower()
247

    
248
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
249
    if permissions is None:
250
        return
251
    allowed, perm_path, perms = permissions
252
    if len(perms) == 0:
253
        return
254
    ret = []
255
    r = ','.join(perms.get('read', []))
256
    if r:
257
        ret.append('read=' + r)
258
    w = ','.join(perms.get('write', []))
259
    if w:
260
        ret.append('write=' + w)
261
    meta['X-Object-Sharing'] = '; '.join(ret)
262
    if '/'.join((v_account, v_container, v_object)) != perm_path:
263
        meta['X-Object-Shared-By'] = perm_path
264
    if request.user_uniq != v_account:
265
        meta['X-Object-Allowed-To'] = allowed
266

    
267
def update_public_meta(public, meta):
268
    if not public:
269
        return
270
    meta['X-Object-Public'] = '/public/' + encode_url(public)
271

    
272
def validate_modification_preconditions(request, meta):
273
    """Check that the modified timestamp conforms with the preconditions set."""
274
    
275
    if 'modified' not in meta:
276
        return # TODO: Always return?
277
    
278
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
279
    if if_modified_since is not None:
280
        if_modified_since = parse_http_date_safe(if_modified_since)
281
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
282
        raise NotModified('Resource has not been modified')
283
    
284
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
285
    if if_unmodified_since is not None:
286
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
287
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
288
        raise PreconditionFailed('Resource has been modified')
289

    
290
def validate_matching_preconditions(request, meta):
291
    """Check that the ETag conforms with the preconditions set."""
292
    
293
    etag = meta['checksum']
294
    if not etag:
295
        etag = None
296
    
297
    if_match = request.META.get('HTTP_IF_MATCH')
298
    if if_match is not None:
299
        if etag is None:
300
            raise PreconditionFailed('Resource does not exist')
301
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
302
            raise PreconditionFailed('Resource ETag does not match')
303
    
304
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
305
    if if_none_match is not None:
306
        # TODO: If this passes, must ignore If-Modified-Since header.
307
        if etag is not None:
308
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
309
                # TODO: Continue if an If-Modified-Since header is present.
310
                if request.method in ('HEAD', 'GET'):
311
                    raise NotModified('Resource ETag matches')
312
                raise PreconditionFailed('Resource exists or ETag matches')
313

    
314
def split_container_object_string(s):
315
    if not len(s) > 0 or s[0] != '/':
316
        raise ValueError
317
    s = s[1:]
318
    pos = s.find('/')
319
    if pos == -1 or pos == len(s) - 1:
320
        raise ValueError
321
    return s[:pos], s[(pos + 1):]
322

    
323
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False, delimiter=None):
324
    """Copy or move an object."""
325
    
326
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
327
        del(request.META['CONTENT_TYPE'])
328
    content_type, meta, permissions, public = get_object_headers(request)
329
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
330
    try:
331
        if move:
332
            version_id = request.backend.move_object(request.user_uniq, src_account, src_container, src_name,
333
                                                        dest_account, dest_container, dest_name,
334
                                                        content_type, 'pithos', meta, False, permissions, delimiter)
335
        else:
336
            version_id = request.backend.copy_object(request.user_uniq, src_account, src_container, src_name,
337
                                                        dest_account, dest_container, dest_name,
338
                                                        content_type, 'pithos', meta, False, permissions, src_version, delimiter)
339
    except NotAllowedError:
340
        raise Forbidden('Not allowed')
341
    except (ItemNotExists, VersionNotExists):
342
        raise ItemNotFound('Container or object does not exist')
343
    except ValueError:
344
        raise BadRequest('Invalid sharing header')
345
    except QuotaError:
346
        raise RequestEntityTooLarge('Quota exceeded')
347
    if public is not None:
348
        try:
349
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
350
        except NotAllowedError:
351
            raise Forbidden('Not allowed')
352
        except ItemNotExists:
353
            raise ItemNotFound('Object does not exist')
354
    return version_id
355

    
356
def get_int_parameter(p):
357
    if p is not None:
358
        try:
359
            p = int(p)
360
        except ValueError:
361
            return None
362
        if p < 0:
363
            return None
364
    return p
365

    
366
def get_content_length(request):
367
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
368
    if content_length is None:
369
        raise LengthRequired('Missing or invalid Content-Length header')
370
    return content_length
371

    
372
def get_range(request, size):
373
    """Parse a Range header from the request.
374
    
375
    Either returns None, when the header is not existent or should be ignored,
376
    or a list of (offset, length) tuples - should be further checked.
377
    """
378
    
379
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
380
    if not ranges.startswith('bytes='):
381
        return None
382
    
383
    ret = []
384
    for r in (x.strip() for x in ranges[6:].split(',')):
385
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
386
        m = p.match(r)
387
        if not m:
388
            return None
389
        offset = m.group('offset')
390
        upto = m.group('upto')
391
        if offset == '' and upto == '':
392
            return None
393
        
394
        if offset != '':
395
            offset = int(offset)
396
            if upto != '':
397
                upto = int(upto)
398
                if offset > upto:
399
                    return None
400
                ret.append((offset, upto - offset + 1))
401
            else:
402
                ret.append((offset, size - offset))
403
        else:
404
            length = int(upto)
405
            ret.append((size - length, length))
406
    
407
    return ret
408

    
409
def get_content_range(request):
410
    """Parse a Content-Range header from the request.
411
    
412
    Either returns None, when the header is not existent or should be ignored,
413
    or an (offset, length, total) tuple - check as length, total may be None.
414
    Returns (None, None, None) if the provided range is '*/*'.
415
    """
416
    
417
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
418
    if not ranges:
419
        return None
420
    
421
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
422
    m = p.match(ranges)
423
    if not m:
424
        if ranges == 'bytes */*':
425
            return (None, None, None)
426
        return None
427
    offset = int(m.group('offset'))
428
    upto = m.group('upto')
429
    total = m.group('total')
430
    if upto != '':
431
        upto = int(upto)
432
    else:
433
        upto = None
434
    if total != '*':
435
        total = int(total)
436
    else:
437
        total = None
438
    if (upto is not None and offset > upto) or \
439
        (total is not None and offset >= total) or \
440
        (total is not None and upto is not None and upto >= total):
441
        return None
442
    
443
    if upto is None:
444
        length = None
445
    else:
446
        length = upto - offset + 1
447
    return (offset, length, total)
448

    
449
def get_sharing(request):
450
    """Parse an X-Object-Sharing header from the request.
451
    
452
    Raises BadRequest on error.
453
    """
454
    
455
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
456
    if permissions is None:
457
        return None
458
    
459
    # TODO: Document or remove '~' replacing.
460
    permissions = permissions.replace('~', '')
461
    
462
    ret = {}
463
    permissions = permissions.replace(' ', '')
464
    if permissions == '':
465
        return ret
466
    for perm in (x for x in permissions.split(';')):
467
        if perm.startswith('read='):
468
            ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
469
            if '' in ret['read']:
470
                ret['read'].remove('')
471
            if '*' in ret['read']:
472
                ret['read'] = ['*']
473
            if len(ret['read']) == 0:
474
                raise BadRequest('Bad X-Object-Sharing header value')
475
        elif perm.startswith('write='):
476
            ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
477
            if '' in ret['write']:
478
                ret['write'].remove('')
479
            if '*' in ret['write']:
480
                ret['write'] = ['*']
481
            if len(ret['write']) == 0:
482
                raise BadRequest('Bad X-Object-Sharing header value')
483
        else:
484
            raise BadRequest('Bad X-Object-Sharing header value')
485
    
486
    # Keep duplicates only in write list.
487
    dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
488
    if dups:
489
        for x in dups:
490
            ret['read'].remove(x)
491
        if len(ret['read']) == 0:
492
            del(ret['read'])
493
    
494
    return ret
495

    
496
def get_public(request):
497
    """Parse an X-Object-Public header from the request.
498
    
499
    Raises BadRequest on error.
500
    """
501
    
502
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
503
    if public is None:
504
        return None
505
    
506
    public = public.replace(' ', '').lower()
507
    if public == 'true':
508
        return True
509
    elif public == 'false' or public == '':
510
        return False
511
    raise BadRequest('Bad X-Object-Public header value')
512

    
513
def raw_input_socket(request):
514
    """Return the socket for reading the rest of the request."""
515
    
516
    server_software = request.META.get('SERVER_SOFTWARE')
517
    if server_software and server_software.startswith('mod_python'):
518
        return request._req
519
    if 'wsgi.input' in request.environ:
520
        return request.environ['wsgi.input']
521
    raise NotImplemented('Unknown server software')
522

    
523
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
524

    
525
def socket_read_iterator(request, length=0, blocksize=4096):
526
    """Return a maximum of blocksize data read from the socket in each iteration.
527
    
528
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
529
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
530
    """
531
    
532
    sock = raw_input_socket(request)
533
    if length < 0: # Chunked transfers
534
        # Small version (server does the dechunking).
535
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
536
            while length < MAX_UPLOAD_SIZE:
537
                data = sock.read(blocksize)
538
                if data == '':
539
                    return
540
                yield data
541
            raise BadRequest('Maximum size is reached')
542
        
543
        # Long version (do the dechunking).
544
        data = ''
545
        while length < MAX_UPLOAD_SIZE:
546
            # Get chunk size.
547
            if hasattr(sock, 'readline'):
548
                chunk_length = sock.readline()
549
            else:
550
                chunk_length = ''
551
                while chunk_length[-1:] != '\n':
552
                    chunk_length += sock.read(1)
553
                chunk_length.strip()
554
            pos = chunk_length.find(';')
555
            if pos >= 0:
556
                chunk_length = chunk_length[:pos]
557
            try:
558
                chunk_length = int(chunk_length, 16)
559
            except Exception, e:
560
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
561
            # Check if done.
562
            if chunk_length == 0:
563
                if len(data) > 0:
564
                    yield data
565
                return
566
            # Get the actual data.
567
            while chunk_length > 0:
568
                chunk = sock.read(min(chunk_length, blocksize))
569
                chunk_length -= len(chunk)
570
                if length > 0:
571
                    length += len(chunk)
572
                data += chunk
573
                if len(data) >= blocksize:
574
                    ret = data[:blocksize]
575
                    data = data[blocksize:]
576
                    yield ret
577
            sock.read(2) # CRLF
578
        raise BadRequest('Maximum size is reached')
579
    else:
580
        if length > MAX_UPLOAD_SIZE:
581
            raise BadRequest('Maximum size is reached')
582
        while length > 0:
583
            data = sock.read(min(length, blocksize))
584
            if not data:
585
                raise BadRequest()
586
            length -= len(data)
587
            yield data
588

    
589
class SaveToBackendHandler(FileUploadHandler):
590
    """Handle a file from an HTML form the django way."""
591
    
592
    def __init__(self, request=None):
593
        super(SaveToBackendHandler, self).__init__(request)
594
        self.backend = request.backend
595
    
596
    def put_data(self, length):
597
        if len(self.data) >= length:
598
            block = self.data[:length]
599
            self.file.hashmap.append(self.backend.put_block(block))
600
            self.md5.update(block)
601
            self.data = self.data[length:]
602
    
603
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
604
        self.md5 = hashlib.md5()        
605
        self.data = ''
606
        self.file = UploadedFile(name=file_name, content_type=content_type, charset=charset)
607
        self.file.size = 0
608
        self.file.hashmap = []
609
    
610
    def receive_data_chunk(self, raw_data, start):
611
        self.data += raw_data
612
        self.file.size += len(raw_data)
613
        self.put_data(self.request.backend.block_size)
614
        return None
615
    
616
    def file_complete(self, file_size):
617
        l = len(self.data)
618
        if l > 0:
619
            self.put_data(l)
620
        self.file.etag = self.md5.hexdigest().lower()
621
        return self.file
622

    
623
class ObjectWrapper(object):
624
    """Return the object's data block-per-block in each iteration.
625
    
626
    Read from the object using the offset and length provided in each entry of the range list.
627
    """
628
    
629
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
630
        self.backend = backend
631
        self.ranges = ranges
632
        self.sizes = sizes
633
        self.hashmaps = hashmaps
634
        self.boundary = boundary
635
        self.size = sum(self.sizes)
636
        
637
        self.file_index = 0
638
        self.block_index = 0
639
        self.block_hash = -1
640
        self.block = ''
641
        
642
        self.range_index = -1
643
        self.offset, self.length = self.ranges[0]
644
    
645
    def __iter__(self):
646
        return self
647
    
648
    def part_iterator(self):
649
        if self.length > 0:
650
            # Get the file for the current offset.
651
            file_size = self.sizes[self.file_index]
652
            while self.offset >= file_size:
653
                self.offset -= file_size
654
                self.file_index += 1
655
                file_size = self.sizes[self.file_index]
656
            
657
            # Get the block for the current position.
658
            self.block_index = int(self.offset / self.backend.block_size)
659
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
660
                self.block_hash = self.hashmaps[self.file_index][self.block_index]
661
                try:
662
                    self.block = self.backend.get_block(self.block_hash)
663
                except ItemNotExists:
664
                    raise ItemNotFound('Block does not exist')
665
            
666
            # Get the data from the block.
667
            bo = self.offset % self.backend.block_size
668
            bs = self.backend.block_size
669
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
670
                self.sizes[self.file_index] % self.backend.block_size):
671
                bs = self.sizes[self.file_index] % self.backend.block_size
672
            bl = min(self.length, bs - bo)
673
            data = self.block[bo:bo + bl]
674
            self.offset += bl
675
            self.length -= bl
676
            return data
677
        else:
678
            raise StopIteration
679
    
680
    def next(self):
681
        if len(self.ranges) == 1:
682
            return self.part_iterator()
683
        if self.range_index == len(self.ranges):
684
            raise StopIteration
685
        try:
686
            if self.range_index == -1:
687
                raise StopIteration
688
            return self.part_iterator()
689
        except StopIteration:
690
            self.range_index += 1
691
            out = []
692
            if self.range_index < len(self.ranges):
693
                # Part header.
694
                self.offset, self.length = self.ranges[self.range_index]
695
                self.file_index = 0
696
                if self.range_index > 0:
697
                    out.append('')
698
                out.append('--' + self.boundary)
699
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
700
                out.append('Content-Transfer-Encoding: binary')
701
                out.append('')
702
                out.append('')
703
                return '\r\n'.join(out)
704
            else:
705
                # Footer.
706
                out.append('')
707
                out.append('--' + self.boundary + '--')
708
                out.append('')
709
                return '\r\n'.join(out)
710

    
711
def object_data_response(request, sizes, hashmaps, meta, public=False):
712
    """Get the HttpResponse object for replying with the object's data."""
713
    
714
    # Range handling.
715
    size = sum(sizes)
716
    ranges = get_range(request, size)
717
    if ranges is None:
718
        ranges = [(0, size)]
719
        ret = 200
720
    else:
721
        check = [True for offset, length in ranges if
722
                    length <= 0 or length > size or
723
                    offset < 0 or offset >= size or
724
                    offset + length > size]
725
        if len(check) > 0:
726
            raise RangeNotSatisfiable('Requested range exceeds object limits')
727
        ret = 206
728
        if_range = request.META.get('HTTP_IF_RANGE')
729
        if if_range:
730
            try:
731
                # Modification time has passed instead.
732
                last_modified = parse_http_date(if_range)
733
                if last_modified != meta['modified']:
734
                    ranges = [(0, size)]
735
                    ret = 200
736
            except ValueError:
737
                if if_range != meta['checksum']:
738
                    ranges = [(0, size)]
739
                    ret = 200
740
    
741
    if ret == 206 and len(ranges) > 1:
742
        boundary = uuid.uuid4().hex
743
    else:
744
        boundary = ''
745
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
746
    response = HttpResponse(wrapper, status=ret)
747
    put_object_headers(response, meta, public)
748
    if ret == 206:
749
        if len(ranges) == 1:
750
            offset, length = ranges[0]
751
            response['Content-Length'] = length # Update with the correct length.
752
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
753
        else:
754
            del(response['Content-Length'])
755
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
756
    return response
757

    
758
def put_object_block(request, hashmap, data, offset):
759
    """Put one block of data at the given offset."""
760
    
761
    bi = int(offset / request.backend.block_size)
762
    bo = offset % request.backend.block_size
763
    bl = min(len(data), request.backend.block_size - bo)
764
    if bi < len(hashmap):
765
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
766
    else:
767
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
768
    return bl # Return ammount of data written.
769

    
770
def hashmap_md5(backend, hashmap, size):
771
    """Produce the MD5 sum from the data in the hashmap."""
772
    
773
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
774
    md5 = hashlib.md5()
775
    bs = backend.block_size
776
    for bi, hash in enumerate(hashmap):
777
        data = backend.get_block(hash) # Blocks come in padded.
778
        if bi == len(hashmap) - 1:
779
            data = data[:size % bs]
780
        md5.update(data)
781
    return md5.hexdigest().lower()
782

    
783
def simple_list_response(request, l):
784
    if request.serialization == 'text':
785
        return '\n'.join(l) + '\n'
786
    if request.serialization == 'xml':
787
        return render_to_string('items.xml', {'items': l})
788
    if request.serialization == 'json':
789
        return json.dumps(l)
790

    
791

    
792
from pithos.backends.util import PithosBackendPool
793
POOL_SIZE = 5
794

    
795

    
796
_pithos_backend_pool = PithosBackendPool(size=POOL_SIZE,
797
                                         db_module=BACKEND_DB_MODULE,
798
                                         db_connection=BACKEND_DB_CONNECTION,
799
                                         block_module=BACKEND_BLOCK_MODULE,
800
                                         block_path=BACKEND_BLOCK_PATH,
801
                                         block_umask=BACKEND_BLOCK_UMASK,
802
                                         queue_module=BACKEND_QUEUE_MODULE,
803
                                         queue_connection=BACKEND_QUEUE_CONNECTION)
804

    
805

    
806
def get_backend():
807
    backend = _pithos_backend_pool.pool_get()
808
    backend.default_policy['quota'] = BACKEND_QUOTA
809
    backend.default_policy['versioning'] = BACKEND_VERSIONING
810
    backend.messages = []
811
    return backend
812

    
813

    
814
def update_request_headers(request):
815
    # Handle URL-encoded keys and values.
816
    meta = dict([(k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
817
    for k, v in meta.iteritems():
818
        try:
819
            k.decode('ascii')
820
            v.decode('ascii')
821
        except UnicodeDecodeError:
822
            raise BadRequest('Bad character in headers.')
823
        if '%' in k or '%' in v:
824
            del(request.META[k])
825
            request.META[unquote(k)] = smart_unicode(unquote(v), strings_only=True)
826

    
827
def update_response_headers(request, response):
828
    if request.serialization == 'xml':
829
        response['Content-Type'] = 'application/xml; charset=UTF-8'
830
    elif request.serialization == 'json':
831
        response['Content-Type'] = 'application/json; charset=UTF-8'
832
    elif not response['Content-Type']:
833
        response['Content-Type'] = 'text/plain; charset=UTF-8'
834
    
835
    if (not response.has_header('Content-Length') and
836
        not (response.has_header('Content-Type') and
837
             response['Content-Type'].startswith('multipart/byteranges'))):
838
        response['Content-Length'] = len(response.content)
839
    
840
    # URL-encode unicode in headers.
841
    meta = response.items()
842
    for k, v in meta:
843
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
844
            k.startswith('X-Object-') or k.startswith('Content-')):
845
            del(response[k])
846
            response[quote(k)] = quote(v, safe='/=,:@; ')
847

    
848
def render_fault(request, fault):
849
    if isinstance(fault, InternalServerError) and settings.DEBUG:
850
        fault.details = format_exc(fault)
851
    
852
    request.serialization = 'text'
853
    data = fault.message + '\n'
854
    if fault.details:
855
        data += '\n' + fault.details
856
    response = HttpResponse(data, status=fault.code)
857
    update_response_headers(request, response)
858
    return response
859

    
860
def request_serialization(request, format_allowed=False):
861
    """Return the serialization format requested.
862
    
863
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
864
    """
865
    
866
    if not format_allowed:
867
        return 'text'
868
    
869
    format = request.GET.get('format')
870
    if format == 'json':
871
        return 'json'
872
    elif format == 'xml':
873
        return 'xml'
874
    
875
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
876
        accept, sep, rest = item.strip().partition(';')
877
        if accept == 'application/json':
878
            return 'json'
879
        elif accept == 'application/xml' or accept == 'text/xml':
880
            return 'xml'
881
    
882
    return 'text'
883

    
884

    
885
def api_method(http_method=None, format_allowed=False, user_required=True):
886
    """Decorator function for views that implement an API method."""
887
    
888
    def decorator(func):
889
        @wraps(func)
890
        def wrapper(request, *args, **kwargs):
891
            try:
892
                if http_method and request.method != http_method:
893
                    raise BadRequest('Method not allowed.')
894
                
895
                if user_required:
896
                    token = None
897
                    if request.method in ('HEAD', 'GET') and COOKIE_NAME in request.COOKIES:
898
                        cookie_value = unquote(request.COOKIES.get(COOKIE_NAME, ''))
899
                        if cookie_value and '|' in cookie_value:
900
                            token = cookie_value.split('|', 1)[1]
901
                    get_user(request, AUTHENTICATION_URL, AUTHENTICATION_USERS, token)
902
                    if  getattr(request, 'user', None) is None:
903
                        raise Unauthorized('Access denied')
904
                
905
                # The args variable may contain up to (account, container, object).
906
                if len(args) > 1 and len(args[1]) > 256:
907
                    raise BadRequest('Container name too large.')
908
                if len(args) > 2 and len(args[2]) > 1024:
909
                    raise BadRequest('Object name too large.')
910
                
911
                # Format and check headers.
912
                update_request_headers(request)
913
                
914
                # Fill in custom request variables.
915
                request.serialization = request_serialization(request, format_allowed)
916
                request.backend = get_backend()
917
                
918
                response = func(request, *args, **kwargs)
919
                update_response_headers(request, response)
920
                return response
921
            except Fault, fault:
922
                if fault.code >= 500:
923
                    logger.exception("API Fault")
924
                return render_fault(request, fault)
925
            except BaseException, e:
926
                logger.exception('Unexpected error: %s' % e)
927
                fault = InternalServerError('Unexpected error: %s' % e)
928
                return render_fault(request, fault)
929
            finally:
930
                if getattr(request, 'backend', None) is not None:
931
                    request.backend.close()
932
        return wrapper
933
    return decorator