Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 88dd5c4d

History | View | Annotate | Download (39.1 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
#
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
#
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
#
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from synnefo.lib.parsedate import parse_http_date_safe, parse_http_date
52
from synnefo.lib.astakos import get_user
53

    
54
from pithos.api.faults import (
55
    Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
56
    Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
57
    RangeNotSatisfiable, InternalServerError, NotImplemented)
58
from pithos.api.short_url import encode_url
59
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
60
                                 BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
61
                                 BACKEND_BLOCK_UMASK,
62
                                 BACKEND_QUEUE_MODULE, BACKEND_QUEUE_HOSTS,
63
                                 BACKEND_QUEUE_EXCHANGE,
64
                                 QUOTAHOLDER_URL, QUOTAHOLDER_TOKEN,
65
                                 BACKEND_QUOTA, BACKEND_VERSIONING,
66
                                 BACKEND_FREE_VERSIONING,
67
                                 AUTHENTICATION_URL, AUTHENTICATION_USERS,
68
                                 SERVICE_TOKEN, COOKIE_NAME, USER_INFO_URL)
69
from pithos.backends import connect_backend
70
from pithos.backends.base import (NotAllowedError, QuotaError, ItemNotExists,
71
                                  VersionNotExists)
72
from synnefo.lib.astakos import get_user_uuid, get_username
73

    
74
import logging
75
import re
76
import hashlib
77
import uuid
78
import decimal
79

    
80

    
81
logger = logging.getLogger(__name__)
82

    
83

    
84
class UTC(tzinfo):
85
    def utcoffset(self, dt):
86
        return timedelta(0)
87

    
88
    def tzname(self, dt):
89
        return 'UTC'
90

    
91
    def dst(self, dt):
92
        return timedelta(0)
93

    
94

    
95
def json_encode_decimal(obj):
96
    if isinstance(obj, decimal.Decimal):
97
        return str(obj)
98
    raise TypeError(repr(obj) + " is not JSON serializable")
99

    
100

    
101
def isoformat(d):
102
    """Return an ISO8601 date string that includes a timezone."""
103

    
104
    return d.replace(tzinfo=UTC()).isoformat()
105

    
106

    
107
def rename_meta_key(d, old, new):
108
    if old not in d:
109
        return
110
    d[new] = d[old]
111
    del(d[old])
112

    
113

    
114
def printable_header_dict(d):
115
    """Format a meta dictionary for printing out json/xml.
116

117
    Convert all keys to lower case and replace dashes with underscores.
118
    Format 'last_modified' timestamp.
119
    """
120

    
121
    if 'last_modified' in d and d['last_modified']:
122
        d['last_modified'] = isoformat(
123
            datetime.fromtimestamp(d['last_modified']))
124
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
125

    
126

    
127
def format_header_key(k):
128
    """Convert underscores to dashes and capitalize intra-dash strings."""
129
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
130

    
131

    
132
def get_header_prefix(request, prefix):
133
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
134

    
135
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
136
    # TODO: Document or remove '~' replacing.
137
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
138

    
139

    
140
def check_meta_headers(meta):
141
    if len(meta) > 90:
142
        raise BadRequest('Too many headers.')
143
    for k, v in meta.iteritems():
144
        if len(k) > 128:
145
            raise BadRequest('Header name too large.')
146
        if len(v) > 256:
147
            raise BadRequest('Header value too large.')
148

    
149

    
150
def get_account_headers(request):
151
    meta = get_header_prefix(request, 'X-Account-Meta-')
152
    check_meta_headers(meta)
153
    groups = {}
154
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
155
        n = k[16:].lower()
156
        if '-' in n or '_' in n:
157
            raise BadRequest('Bad characters in group name')
158
        groups[n] = v.replace(' ', '').split(',')
159
        while '' in groups[n]:
160
            groups[n].remove('')
161
    return meta, groups
162

    
163

    
164
def put_account_headers(response, meta, groups, policy):
165
    if 'count' in meta:
166
        response['X-Account-Container-Count'] = meta['count']
167
    if 'bytes' in meta:
168
        response['X-Account-Bytes-Used'] = meta['bytes']
169
    response['Last-Modified'] = http_date(int(meta['modified']))
170
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
171
        response[smart_str(
172
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
173
    if 'until_timestamp' in meta:
174
        response['X-Account-Until-Timestamp'] = http_date(
175
            int(meta['until_timestamp']))
176
    for k, v in groups.iteritems():
177
        k = smart_str(k, strings_only=True)
178
        k = format_header_key('X-Account-Group-' + k)
179
        v = smart_str(','.join(v), strings_only=True)
180
        response[k] = v
181
    for k, v in policy.iteritems():
182
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
183

    
184

    
185
def get_container_headers(request):
186
    meta = get_header_prefix(request, 'X-Container-Meta-')
187
    check_meta_headers(meta)
188
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
189
    return meta, policy
190

    
191

    
192
def put_container_headers(request, response, meta, policy):
193
    if 'count' in meta:
194
        response['X-Container-Object-Count'] = meta['count']
195
    if 'bytes' in meta:
196
        response['X-Container-Bytes-Used'] = meta['bytes']
197
    response['Last-Modified'] = http_date(int(meta['modified']))
198
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
199
        response[smart_str(
200
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
201
    l = [smart_str(x, strings_only=True) for x in meta['object_meta']
202
         if x.startswith('X-Object-Meta-')]
203
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
204
    response['X-Container-Block-Size'] = request.backend.block_size
205
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
206
    if 'until_timestamp' in meta:
207
        response['X-Container-Until-Timestamp'] = http_date(
208
            int(meta['until_timestamp']))
209
    for k, v in policy.iteritems():
210
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
211

    
212

    
213
def get_object_headers(request):
214
    content_type = request.META.get('CONTENT_TYPE', None)
215
    meta = get_header_prefix(request, 'X-Object-Meta-')
216
    check_meta_headers(meta)
217
    if request.META.get('HTTP_CONTENT_ENCODING'):
218
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
219
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
220
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
221
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
222
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
223
    return content_type, meta, get_sharing(request), get_public(request)
224

    
225

    
226
def put_object_headers(response, meta, restricted=False):
227
    response['ETag'] = meta['checksum']
228
    response['Content-Length'] = meta['bytes']
229
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
230
    response['Last-Modified'] = http_date(int(meta['modified']))
231
    if not restricted:
232
        response['X-Object-Hash'] = meta['hash']
233
        response['X-Object-UUID'] = meta['uuid']
234
        modified_by = retrieve_username(meta['modified_by'])
235
        response['X-Object-Modified-By'] = smart_str(
236
            modified_by, strings_only=True)
237
        response['X-Object-Version'] = meta['version']
238
        response['X-Object-Version-Timestamp'] = http_date(
239
            int(meta['version_timestamp']))
240
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
241
            response[smart_str(
242
                k, strings_only=True)] = smart_str(meta[k], strings_only=True)
243
        for k in (
244
            'Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
245
            'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
246
                'X-Object-Public'):
247
            if k in meta:
248
                response[k] = smart_str(meta[k], strings_only=True)
249
    else:
250
        for k in ('Content-Encoding', 'Content-Disposition'):
251
            if k in meta:
252
                response[k] = smart_str(meta[k], strings_only=True)
253

    
254

    
255
def update_manifest_meta(request, v_account, meta):
256
    """Update metadata if the object has an X-Object-Manifest."""
257

    
258
    if 'X-Object-Manifest' in meta:
259
        etag = ''
260
        bytes = 0
261
        try:
262
            src_container, src_name = split_container_object_string(
263
                '/' + meta['X-Object-Manifest'])
264
            objects = request.backend.list_objects(
265
                request.user_uniq, v_account,
266
                src_container, prefix=src_name, virtual=False)
267
            for x in objects:
268
                src_meta = request.backend.get_object_meta(request.user_uniq,
269
                                                           v_account, src_container, x[0], 'pithos', x[1])
270
                etag += src_meta['checksum']
271
                bytes += src_meta['bytes']
272
        except:
273
            # Ignore errors.
274
            return
275
        meta['bytes'] = bytes
276
        md5 = hashlib.md5()
277
        md5.update(etag)
278
        meta['checksum'] = md5.hexdigest().lower()
279

    
280
def is_uuid(str):
281
    try:
282
        uuid.UUID(str)
283
    except ValueError:
284
       return False
285
    else:
286
       return True
287

    
288
def retrieve_username(uuid):
289
    try:
290
        return get_username(
291
            SERVICE_TOKEN, uuid, USER_INFO_URL, AUTHENTICATION_USERS)
292
    except:
293
        # if it fails just leave the metadata intact
294
        return uuid
295

    
296
def retrieve_uuid(username):
297
    if is_uuid(username):
298
            return username
299

    
300
    try:
301
        return get_user_uuid(
302
            SERVICE_TOKEN, username, USER_INFO_URL, AUTHENTICATION_USERS)
303
    except Exception, e:
304
        if e.args:
305
            status = e.args[-1]
306
            if status == 404:
307
                raise ItemNotExists(username)
308
        raise
309

    
310
def replace_permissions_username(holder):
311
    try:
312
        # check first for a group permission
313
        account, group = holder.split(':')
314
    except ValueError:
315
        return retrieve_uuid(holder)
316
    else:
317
        return ':'.join([retrieve_uuid(account), group])
318

    
319
def replace_permissions_uuid(holder):
320
    try:
321
        # check first for a group permission
322
        account, group = holder.split(':')
323
    except ValueError:
324
        return retrieve_username(holder)
325
    else:
326
        return ':'.join([retrieve_username(account), group])
327

    
328
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
329
    if permissions is None:
330
        return
331
    allowed, perm_path, perms = permissions
332
    if len(perms) == 0:
333
        return
334

    
335
    perms['read'] = [replace_permissions_uuid(x) for x in perms.get('read', [])]
336
    perms['write'] = \
337
        [replace_permissions_uuid(x) for x in perms.get('write', [])]
338

    
339
    ret = []
340

    
341
    r = ','.join(perms.get('read', []))
342
    if r:
343
        ret.append('read=' + r)
344
    w = ','.join(perms.get('write', []))
345
    if w:
346
        ret.append('write=' + w)
347
    meta['X-Object-Sharing'] = '; '.join(ret)
348
    if '/'.join((v_account, v_container, v_object)) != perm_path:
349
        meta['X-Object-Shared-By'] = perm_path
350
    if request.user_uniq != v_account:
351
        meta['X-Object-Allowed-To'] = allowed
352

    
353

    
354
def update_public_meta(public, meta):
355
    if not public:
356
        return
357
    meta['X-Object-Public'] = '/public/' + encode_url(public)
358

    
359

    
360
def validate_modification_preconditions(request, meta):
361
    """Check that the modified timestamp conforms with the preconditions set."""
362

    
363
    if 'modified' not in meta:
364
        return  # TODO: Always return?
365

    
366
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
367
    if if_modified_since is not None:
368
        if_modified_since = parse_http_date_safe(if_modified_since)
369
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
370
        raise NotModified('Resource has not been modified')
371

    
372
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
373
    if if_unmodified_since is not None:
374
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
375
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
376
        raise PreconditionFailed('Resource has been modified')
377

    
378

    
379
def validate_matching_preconditions(request, meta):
380
    """Check that the ETag conforms with the preconditions set."""
381

    
382
    etag = meta['checksum']
383
    if not etag:
384
        etag = None
385

    
386
    if_match = request.META.get('HTTP_IF_MATCH')
387
    if if_match is not None:
388
        if etag is None:
389
            raise PreconditionFailed('Resource does not exist')
390
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
391
            raise PreconditionFailed('Resource ETag does not match')
392

    
393
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
394
    if if_none_match is not None:
395
        # TODO: If this passes, must ignore If-Modified-Since header.
396
        if etag is not None:
397
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
398
                # TODO: Continue if an If-Modified-Since header is present.
399
                if request.method in ('HEAD', 'GET'):
400
                    raise NotModified('Resource ETag matches')
401
                raise PreconditionFailed('Resource exists or ETag matches')
402

    
403

    
404
def split_container_object_string(s):
405
    if not len(s) > 0 or s[0] != '/':
406
        raise ValueError
407
    s = s[1:]
408
    pos = s.find('/')
409
    if pos == -1 or pos == len(s) - 1:
410
        raise ValueError
411
    return s[:pos], s[(pos + 1):]
412

    
413

    
414
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False, delimiter=None):
415
    """Copy or move an object."""
416

    
417
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
418
        del(request.META['CONTENT_TYPE'])
419
    content_type, meta, permissions, public = get_object_headers(request)
420
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
421
    try:
422
        if move:
423
            version_id = request.backend.move_object(
424
                request.user_uniq, src_account, src_container, src_name,
425
                dest_account, dest_container, dest_name,
426
                content_type, 'pithos', meta, False, permissions, delimiter)
427
        else:
428
            version_id = request.backend.copy_object(
429
                request.user_uniq, src_account, src_container, src_name,
430
                dest_account, dest_container, dest_name,
431
                content_type, 'pithos', meta, False, permissions, src_version, delimiter)
432
    except NotAllowedError:
433
        raise Forbidden('Not allowed')
434
    except (ItemNotExists, VersionNotExists):
435
        raise ItemNotFound('Container or object does not exist')
436
    except ValueError:
437
        raise BadRequest('Invalid sharing header')
438
    except QuotaError:
439
        raise RequestEntityTooLarge('Quota exceeded')
440
    if public is not None:
441
        try:
442
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
443
        except NotAllowedError:
444
            raise Forbidden('Not allowed')
445
        except ItemNotExists:
446
            raise ItemNotFound('Object does not exist')
447
    return version_id
448

    
449

    
450
def get_int_parameter(p):
451
    if p is not None:
452
        try:
453
            p = int(p)
454
        except ValueError:
455
            return None
456
        if p < 0:
457
            return None
458
    return p
459

    
460

    
461
def get_content_length(request):
462
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
463
    if content_length is None:
464
        raise LengthRequired('Missing or invalid Content-Length header')
465
    return content_length
466

    
467

    
468
def get_range(request, size):
469
    """Parse a Range header from the request.
470

471
    Either returns None, when the header is not existent or should be ignored,
472
    or a list of (offset, length) tuples - should be further checked.
473
    """
474

    
475
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
476
    if not ranges.startswith('bytes='):
477
        return None
478

    
479
    ret = []
480
    for r in (x.strip() for x in ranges[6:].split(',')):
481
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
482
        m = p.match(r)
483
        if not m:
484
            return None
485
        offset = m.group('offset')
486
        upto = m.group('upto')
487
        if offset == '' and upto == '':
488
            return None
489

    
490
        if offset != '':
491
            offset = int(offset)
492
            if upto != '':
493
                upto = int(upto)
494
                if offset > upto:
495
                    return None
496
                ret.append((offset, upto - offset + 1))
497
            else:
498
                ret.append((offset, size - offset))
499
        else:
500
            length = int(upto)
501
            ret.append((size - length, length))
502

    
503
    return ret
504

    
505

    
506
def get_content_range(request):
507
    """Parse a Content-Range header from the request.
508

509
    Either returns None, when the header is not existent or should be ignored,
510
    or an (offset, length, total) tuple - check as length, total may be None.
511
    Returns (None, None, None) if the provided range is '*/*'.
512
    """
513

    
514
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
515
    if not ranges:
516
        return None
517

    
518
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
519
    m = p.match(ranges)
520
    if not m:
521
        if ranges == 'bytes */*':
522
            return (None, None, None)
523
        return None
524
    offset = int(m.group('offset'))
525
    upto = m.group('upto')
526
    total = m.group('total')
527
    if upto != '':
528
        upto = int(upto)
529
    else:
530
        upto = None
531
    if total != '*':
532
        total = int(total)
533
    else:
534
        total = None
535
    if (upto is not None and offset > upto) or \
536
        (total is not None and offset >= total) or \
537
            (total is not None and upto is not None and upto >= total):
538
        return None
539

    
540
    if upto is None:
541
        length = None
542
    else:
543
        length = upto - offset + 1
544
    return (offset, length, total)
545

    
546

    
547
def get_sharing(request):
548
    """Parse an X-Object-Sharing header from the request.
549

550
    Raises BadRequest on error.
551
    """
552

    
553
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
554
    if permissions is None:
555
        return None
556

    
557
    # TODO: Document or remove '~' replacing.
558
    permissions = permissions.replace('~', '')
559

    
560
    ret = {}
561
    permissions = permissions.replace(' ', '')
562
    if permissions == '':
563
        return ret
564
    for perm in (x for x in permissions.split(';')):
565
        if perm.startswith('read='):
566
            ret['read'] = list(set(
567
                [v.replace(' ', '').lower() for v in perm[5:].split(',')]))
568
            if '' in ret['read']:
569
                ret['read'].remove('')
570
            if '*' in ret['read']:
571
                ret['read'] = ['*']
572
            if len(ret['read']) == 0:
573
                raise BadRequest(
574
                    'Bad X-Object-Sharing header value: invalid length')
575
        elif perm.startswith('write='):
576
            ret['write'] = list(set(
577
                [v.replace(' ', '').lower() for v in perm[6:].split(',')]))
578
            if '' in ret['write']:
579
                ret['write'].remove('')
580
            if '*' in ret['write']:
581
                ret['write'] = ['*']
582
            if len(ret['write']) == 0:
583
                raise BadRequest(
584
                    'Bad X-Object-Sharing header value: invalid length')
585
        else:
586
            raise BadRequest(
587
                'Bad X-Object-Sharing header value: missing prefix')
588

    
589
    # replace username with uuid
590
    try:
591
        ret['read'] = \
592
            [replace_permissions_username(x) for x in ret.get('read', [])]
593
        ret['write'] = \
594
            [replace_permissions_username(x) for x in ret.get('write', [])]
595
    except ItemNotExists, e:
596
        raise BadRequest(
597
            'Bad X-Object-Sharing header value: unknown account: %s' % e)
598

    
599
    # Keep duplicates only in write list.
600
    dups = [x for x in ret.get(
601
        'read', []) if x in ret.get('write', []) and x != '*']
602
    if dups:
603
        for x in dups:
604
            ret['read'].remove(x)
605
        if len(ret['read']) == 0:
606
            del(ret['read'])
607

    
608
    return ret
609

    
610

    
611
def get_public(request):
612
    """Parse an X-Object-Public header from the request.
613

614
    Raises BadRequest on error.
615
    """
616

    
617
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
618
    if public is None:
619
        return None
620

    
621
    public = public.replace(' ', '').lower()
622
    if public == 'true':
623
        return True
624
    elif public == 'false' or public == '':
625
        return False
626
    raise BadRequest('Bad X-Object-Public header value')
627

    
628

    
629
def raw_input_socket(request):
630
    """Return the socket for reading the rest of the request."""
631

    
632
    server_software = request.META.get('SERVER_SOFTWARE')
633
    if server_software and server_software.startswith('mod_python'):
634
        return request._req
635
    if 'wsgi.input' in request.environ:
636
        return request.environ['wsgi.input']
637
    raise NotImplemented('Unknown server software')
638

    
639
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024)  # 5GB
640

    
641

    
642
def socket_read_iterator(request, length=0, blocksize=4096):
643
    """Return a maximum of blocksize data read from the socket in each iteration.
644

645
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
646
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
647
    """
648

    
649
    sock = raw_input_socket(request)
650
    if length < 0:  # Chunked transfers
651
        # Small version (server does the dechunking).
652
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
653
            while length < MAX_UPLOAD_SIZE:
654
                data = sock.read(blocksize)
655
                if data == '':
656
                    return
657
                yield data
658
            raise BadRequest('Maximum size is reached')
659

    
660
        # Long version (do the dechunking).
661
        data = ''
662
        while length < MAX_UPLOAD_SIZE:
663
            # Get chunk size.
664
            if hasattr(sock, 'readline'):
665
                chunk_length = sock.readline()
666
            else:
667
                chunk_length = ''
668
                while chunk_length[-1:] != '\n':
669
                    chunk_length += sock.read(1)
670
                chunk_length.strip()
671
            pos = chunk_length.find(';')
672
            if pos >= 0:
673
                chunk_length = chunk_length[:pos]
674
            try:
675
                chunk_length = int(chunk_length, 16)
676
            except Exception, e:
677
                raise BadRequest('Bad chunk size')
678
                                 # TODO: Change to something more appropriate.
679
            # Check if done.
680
            if chunk_length == 0:
681
                if len(data) > 0:
682
                    yield data
683
                return
684
            # Get the actual data.
685
            while chunk_length > 0:
686
                chunk = sock.read(min(chunk_length, blocksize))
687
                chunk_length -= len(chunk)
688
                if length > 0:
689
                    length += len(chunk)
690
                data += chunk
691
                if len(data) >= blocksize:
692
                    ret = data[:blocksize]
693
                    data = data[blocksize:]
694
                    yield ret
695
            sock.read(2)  # CRLF
696
        raise BadRequest('Maximum size is reached')
697
    else:
698
        if length > MAX_UPLOAD_SIZE:
699
            raise BadRequest('Maximum size is reached')
700
        while length > 0:
701
            data = sock.read(min(length, blocksize))
702
            if not data:
703
                raise BadRequest()
704
            length -= len(data)
705
            yield data
706

    
707

    
708
class SaveToBackendHandler(FileUploadHandler):
709
    """Handle a file from an HTML form the django way."""
710

    
711
    def __init__(self, request=None):
712
        super(SaveToBackendHandler, self).__init__(request)
713
        self.backend = request.backend
714

    
715
    def put_data(self, length):
716
        if len(self.data) >= length:
717
            block = self.data[:length]
718
            self.file.hashmap.append(self.backend.put_block(block))
719
            self.md5.update(block)
720
            self.data = self.data[length:]
721

    
722
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
723
        self.md5 = hashlib.md5()
724
        self.data = ''
725
        self.file = UploadedFile(
726
            name=file_name, content_type=content_type, charset=charset)
727
        self.file.size = 0
728
        self.file.hashmap = []
729

    
730
    def receive_data_chunk(self, raw_data, start):
731
        self.data += raw_data
732
        self.file.size += len(raw_data)
733
        self.put_data(self.request.backend.block_size)
734
        return None
735

    
736
    def file_complete(self, file_size):
737
        l = len(self.data)
738
        if l > 0:
739
            self.put_data(l)
740
        self.file.etag = self.md5.hexdigest().lower()
741
        return self.file
742

    
743

    
744
class ObjectWrapper(object):
745
    """Return the object's data block-per-block in each iteration.
746

747
    Read from the object using the offset and length provided in each entry of the range list.
748
    """
749

    
750
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
751
        self.backend = backend
752
        self.ranges = ranges
753
        self.sizes = sizes
754
        self.hashmaps = hashmaps
755
        self.boundary = boundary
756
        self.size = sum(self.sizes)
757

    
758
        self.file_index = 0
759
        self.block_index = 0
760
        self.block_hash = -1
761
        self.block = ''
762

    
763
        self.range_index = -1
764
        self.offset, self.length = self.ranges[0]
765

    
766
    def __iter__(self):
767
        return self
768

    
769
    def part_iterator(self):
770
        if self.length > 0:
771
            # Get the file for the current offset.
772
            file_size = self.sizes[self.file_index]
773
            while self.offset >= file_size:
774
                self.offset -= file_size
775
                self.file_index += 1
776
                file_size = self.sizes[self.file_index]
777

    
778
            # Get the block for the current position.
779
            self.block_index = int(self.offset / self.backend.block_size)
780
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
781
                self.block_hash = self.hashmaps[
782
                    self.file_index][self.block_index]
783
                try:
784
                    self.block = self.backend.get_block(self.block_hash)
785
                except ItemNotExists:
786
                    raise ItemNotFound('Block does not exist')
787

    
788
            # Get the data from the block.
789
            bo = self.offset % self.backend.block_size
790
            bs = self.backend.block_size
791
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
792
                    self.sizes[self.file_index] % self.backend.block_size):
793
                bs = self.sizes[self.file_index] % self.backend.block_size
794
            bl = min(self.length, bs - bo)
795
            data = self.block[bo:bo + bl]
796
            self.offset += bl
797
            self.length -= bl
798
            return data
799
        else:
800
            raise StopIteration
801

    
802
    def next(self):
803
        if len(self.ranges) == 1:
804
            return self.part_iterator()
805
        if self.range_index == len(self.ranges):
806
            raise StopIteration
807
        try:
808
            if self.range_index == -1:
809
                raise StopIteration
810
            return self.part_iterator()
811
        except StopIteration:
812
            self.range_index += 1
813
            out = []
814
            if self.range_index < len(self.ranges):
815
                # Part header.
816
                self.offset, self.length = self.ranges[self.range_index]
817
                self.file_index = 0
818
                if self.range_index > 0:
819
                    out.append('')
820
                out.append('--' + self.boundary)
821
                out.append('Content-Range: bytes %d-%d/%d' % (
822
                    self.offset, self.offset + self.length - 1, self.size))
823
                out.append('Content-Transfer-Encoding: binary')
824
                out.append('')
825
                out.append('')
826
                return '\r\n'.join(out)
827
            else:
828
                # Footer.
829
                out.append('')
830
                out.append('--' + self.boundary + '--')
831
                out.append('')
832
                return '\r\n'.join(out)
833

    
834

    
835
def object_data_response(request, sizes, hashmaps, meta, public=False):
836
    """Get the HttpResponse object for replying with the object's data."""
837

    
838
    # Range handling.
839
    size = sum(sizes)
840
    ranges = get_range(request, size)
841
    if ranges is None:
842
        ranges = [(0, size)]
843
        ret = 200
844
    else:
845
        check = [True for offset, length in ranges if
846
                 length <= 0 or length > size or
847
                 offset < 0 or offset >= size or
848
                 offset + length > size]
849
        if len(check) > 0:
850
            raise RangeNotSatisfiable('Requested range exceeds object limits')
851
        ret = 206
852
        if_range = request.META.get('HTTP_IF_RANGE')
853
        if if_range:
854
            try:
855
                # Modification time has passed instead.
856
                last_modified = parse_http_date(if_range)
857
                if last_modified != meta['modified']:
858
                    ranges = [(0, size)]
859
                    ret = 200
860
            except ValueError:
861
                if if_range != meta['checksum']:
862
                    ranges = [(0, size)]
863
                    ret = 200
864

    
865
    if ret == 206 and len(ranges) > 1:
866
        boundary = uuid.uuid4().hex
867
    else:
868
        boundary = ''
869
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
870
    response = HttpResponse(wrapper, status=ret)
871
    put_object_headers(response, meta, public)
872
    if ret == 206:
873
        if len(ranges) == 1:
874
            offset, length = ranges[0]
875
            response[
876
                'Content-Length'] = length  # Update with the correct length.
877
            response['Content-Range'] = 'bytes %d-%d/%d' % (
878
                offset, offset + length - 1, size)
879
        else:
880
            del(response['Content-Length'])
881
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (
882
                boundary,)
883
    return response
884

    
885

    
886
def put_object_block(request, hashmap, data, offset):
887
    """Put one block of data at the given offset."""
888

    
889
    bi = int(offset / request.backend.block_size)
890
    bo = offset % request.backend.block_size
891
    bl = min(len(data), request.backend.block_size - bo)
892
    if bi < len(hashmap):
893
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
894
    else:
895
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
896
    return bl  # Return ammount of data written.
897

    
898

    
899
def hashmap_md5(backend, hashmap, size):
900
    """Produce the MD5 sum from the data in the hashmap."""
901

    
902
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
903
    md5 = hashlib.md5()
904
    bs = backend.block_size
905
    for bi, hash in enumerate(hashmap):
906
        data = backend.get_block(hash)  # Blocks come in padded.
907
        if bi == len(hashmap) - 1:
908
            data = data[:size % bs]
909
        md5.update(data)
910
    return md5.hexdigest().lower()
911

    
912

    
913
def simple_list_response(request, l):
914
    if request.serialization == 'text':
915
        return '\n'.join(l) + '\n'
916
    if request.serialization == 'xml':
917
        return render_to_string('items.xml', {'items': l})
918
    if request.serialization == 'json':
919
        return json.dumps(l)
920

    
921

    
922
from pithos.backends.util import PithosBackendPool
923
POOL_SIZE = 5
924

    
925

    
926
_pithos_backend_pool = PithosBackendPool(size=POOL_SIZE,
927
                                         db_module=BACKEND_DB_MODULE,
928
                                         db_connection=BACKEND_DB_CONNECTION,
929
                                         block_module=BACKEND_BLOCK_MODULE,
930
                                         block_path=BACKEND_BLOCK_PATH,
931
                                         block_umask=BACKEND_BLOCK_UMASK,
932
                                         queue_module=BACKEND_QUEUE_MODULE,
933
                                         queue_hosts=BACKEND_QUEUE_HOSTS,
934
                                         queue_exchange=BACKEND_QUEUE_EXCHANGE,
935
                                         quotaholder_url=QUOTAHOLDER_URL,
936
                                         quotaholder_token=QUOTAHOLDER_TOKEN,
937
                                         free_versioning=BACKEND_FREE_VERSIONING)
938

    
939

    
940
def get_backend():
941
    backend = _pithos_backend_pool.pool_get()
942
    backend.default_policy['quota'] = BACKEND_QUOTA
943
    backend.default_policy['versioning'] = BACKEND_VERSIONING
944
    backend.messages = []
945
    return backend
946

    
947

    
948
def update_request_headers(request):
949
    # Handle URL-encoded keys and values.
950
    meta = dict([(
951
        k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
952
    for k, v in meta.iteritems():
953
        try:
954
            k.decode('ascii')
955
            v.decode('ascii')
956
        except UnicodeDecodeError:
957
            raise BadRequest('Bad character in headers.')
958
        if '%' in k or '%' in v:
959
            del(request.META[k])
960
            request.META[unquote(k)] = smart_unicode(unquote(
961
                v), strings_only=True)
962

    
963

    
964
def update_response_headers(request, response):
965
    if request.serialization == 'xml':
966
        response['Content-Type'] = 'application/xml; charset=UTF-8'
967
    elif request.serialization == 'json':
968
        response['Content-Type'] = 'application/json; charset=UTF-8'
969
    elif not response['Content-Type']:
970
        response['Content-Type'] = 'text/plain; charset=UTF-8'
971

    
972
    if (not response.has_header('Content-Length') and
973
        not (response.has_header('Content-Type') and
974
             response['Content-Type'].startswith('multipart/byteranges'))):
975
        response['Content-Length'] = len(response.content)
976

    
977
    # URL-encode unicode in headers.
978
    meta = response.items()
979
    for k, v in meta:
980
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
981
                k.startswith('X-Object-') or k.startswith('Content-')):
982
            del(response[k])
983
            response[quote(k)] = quote(v, safe='/=,:@; ')
984

    
985

    
986
def render_fault(request, fault):
987
    if isinstance(fault, InternalServerError) and settings.DEBUG:
988
        fault.details = format_exc(fault)
989

    
990
    request.serialization = 'text'
991
    data = fault.message + '\n'
992
    if fault.details:
993
        data += '\n' + fault.details
994
    response = HttpResponse(data, status=fault.code)
995
    update_response_headers(request, response)
996
    return response
997

    
998

    
999
def request_serialization(request, format_allowed=False):
1000
    """Return the serialization format requested.
1001

1002
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
1003
    """
1004

    
1005
    if not format_allowed:
1006
        return 'text'
1007

    
1008
    format = request.GET.get('format')
1009
    if format == 'json':
1010
        return 'json'
1011
    elif format == 'xml':
1012
        return 'xml'
1013

    
1014
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
1015
        accept, sep, rest = item.strip().partition(';')
1016
        if accept == 'application/json':
1017
            return 'json'
1018
        elif accept == 'application/xml' or accept == 'text/xml':
1019
            return 'xml'
1020

    
1021
    return 'text'
1022

    
1023
class User(unicode):
1024
    pass
1025

    
1026
def get_pithos_usage(usage):
1027
    for u in usage:
1028
        if u.get('name') == 'pithos+.diskspace':
1029
            return u
1030

    
1031
def api_method(http_method=None, format_allowed=False, user_required=True,
1032
        request_usage=False):
1033
    """Decorator function for views that implement an API method."""
1034

    
1035
    def decorator(func):
1036
        @wraps(func)
1037
        def wrapper(request, *args, **kwargs):
1038
            try:
1039
                if http_method and request.method != http_method:
1040
                    raise BadRequest('Method not allowed.')
1041

    
1042
                if user_required:
1043
                    token = None
1044
                    if request.method in ('HEAD', 'GET') and COOKIE_NAME in request.COOKIES:
1045
                        cookie_value = unquote(
1046
                            request.COOKIES.get(COOKIE_NAME, ''))
1047
                        account, sep, token = cookie_value.partition('|')
1048
                    get_user(request,
1049
                             AUTHENTICATION_URL,
1050
                             AUTHENTICATION_USERS,
1051
                             token,
1052
                             user_required)
1053
                    if  getattr(request, 'user', None) is None:
1054
                        raise Unauthorized('Access denied')
1055
                    assert getattr(request, 'user_uniq', None) != None
1056
                    request.user_uniq = User(request.user_uniq)
1057
                    request.user_uniq.uuid = request.user.get('uuid')
1058
                    request.user_usage = get_pithos_usage(
1059
                        request.user.get('usage'))
1060
                
1061
                # The args variable may contain up to (account, container, object).
1062
                if len(args) > 1 and len(args[1]) > 256:
1063
                    raise BadRequest('Container name too large.')
1064
                if len(args) > 2 and len(args[2]) > 1024:
1065
                    raise BadRequest('Object name too large.')
1066

    
1067
                # Format and check headers.
1068
                update_request_headers(request)
1069

    
1070
                # Fill in custom request variables.
1071
                request.serialization = request_serialization(
1072
                    request, format_allowed)
1073
                request.backend = get_backend()
1074

    
1075
                response = func(request, *args, **kwargs)
1076
                update_response_headers(request, response)
1077
                return response
1078
            except Fault, fault:
1079
                if fault.code >= 500:
1080
                    logger.exception("API Fault")
1081
                return render_fault(request, fault)
1082
            except BaseException, e:
1083
                logger.exception('Unexpected error: %s' % e)
1084
                fault = InternalServerError('Unexpected error: %s' % e)
1085
                return render_fault(request, fault)
1086
            finally:
1087
                if getattr(request, 'backend', None) is not None:
1088
                    request.backend.close()
1089
        return wrapper
1090
    return decorator