Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 469d0997

History | View | Annotate | Download (41 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
#
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
#
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
#
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from synnefo.lib.parsedate import parse_http_date_safe, parse_http_date
52
from synnefo.lib.astakos import get_user
53

    
54
from pithos.api.faults import (
55
    Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
56
    Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
57
    RangeNotSatisfiable, InternalServerError, NotImplemented)
58
from pithos.api.short_url import encode_url
59
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
60
                                 BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
61
                                 BACKEND_BLOCK_UMASK,
62
                                 BACKEND_QUEUE_MODULE, BACKEND_QUEUE_HOSTS,
63
                                 BACKEND_QUEUE_EXCHANGE,
64
                                 QUOTAHOLDER_URL, QUOTAHOLDER_TOKEN,
65
                                 BACKEND_QUOTA, BACKEND_VERSIONING,
66
                                 BACKEND_FREE_VERSIONING,
67
                                 AUTHENTICATION_URL, AUTHENTICATION_USERS,
68
                                 COOKIE_NAME, USER_CATALOG_URL,
69
                                 RADOS_STORAGE, RADOS_POOL_BLOCKS,
70
                                 RADOS_POOL_MAPS, TRANSLATE_UUIDS)
71
from pithos.backends import connect_backend
72
from pithos.backends.base import (NotAllowedError, QuotaError, ItemNotExists,
73
                                  VersionNotExists)
74
from synnefo.lib.astakos import (get_user_uuid, get_displayname,
75
                                 get_uuids, get_displaynames)
76

    
77
import logging
78
import re
79
import hashlib
80
import uuid
81
import decimal
82

    
83
logger = logging.getLogger(__name__)
84

    
85

    
86
class UTC(tzinfo):
87
    def utcoffset(self, dt):
88
        return timedelta(0)
89

    
90
    def tzname(self, dt):
91
        return 'UTC'
92

    
93
    def dst(self, dt):
94
        return timedelta(0)
95

    
96

    
97
def json_encode_decimal(obj):
98
    if isinstance(obj, decimal.Decimal):
99
        return str(obj)
100
    raise TypeError(repr(obj) + " is not JSON serializable")
101

    
102

    
103
def isoformat(d):
104
    """Return an ISO8601 date string that includes a timezone."""
105

    
106
    return d.replace(tzinfo=UTC()).isoformat()
107

    
108

    
109
def rename_meta_key(d, old, new):
110
    if old not in d:
111
        return
112
    d[new] = d[old]
113
    del(d[old])
114

    
115

    
116
def printable_header_dict(d):
117
    """Format a meta dictionary for printing out json/xml.
118

119
    Convert all keys to lower case and replace dashes with underscores.
120
    Format 'last_modified' timestamp.
121
    """
122

    
123
    if 'last_modified' in d and d['last_modified']:
124
        d['last_modified'] = isoformat(
125
            datetime.fromtimestamp(d['last_modified']))
126
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
127

    
128

    
129
def format_header_key(k):
130
    """Convert underscores to dashes and capitalize intra-dash strings."""
131
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
132

    
133

    
134
def get_header_prefix(request, prefix):
135
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
136

    
137
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
138
    # TODO: Document or remove '~' replacing.
139
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
140

    
141

    
142
def check_meta_headers(meta):
143
    if len(meta) > 90:
144
        raise BadRequest('Too many headers.')
145
    for k, v in meta.iteritems():
146
        if len(k) > 128:
147
            raise BadRequest('Header name too large.')
148
        if len(v) > 256:
149
            raise BadRequest('Header value too large.')
150

    
151

    
152
def get_account_headers(request):
153
    meta = get_header_prefix(request, 'X-Account-Meta-')
154
    check_meta_headers(meta)
155
    groups = {}
156
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
157
        n = k[16:].lower()
158
        if '-' in n or '_' in n:
159
            raise BadRequest('Bad characters in group name')
160
        groups[n] = v.replace(' ', '').split(',')
161
        while '' in groups[n]:
162
            groups[n].remove('')
163
    return meta, groups
164

    
165

    
166
def put_account_headers(response, meta, groups, policy):
167
    if 'count' in meta:
168
        response['X-Account-Container-Count'] = meta['count']
169
    if 'bytes' in meta:
170
        response['X-Account-Bytes-Used'] = meta['bytes']
171
    response['Last-Modified'] = http_date(int(meta['modified']))
172
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
173
        response[smart_str(
174
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
175
    if 'until_timestamp' in meta:
176
        response['X-Account-Until-Timestamp'] = http_date(
177
            int(meta['until_timestamp']))
178
    for k, v in groups.iteritems():
179
        k = smart_str(k, strings_only=True)
180
        k = format_header_key('X-Account-Group-' + k)
181
        v = smart_str(','.join(v), strings_only=True)
182
        response[k] = v
183
    for k, v in policy.iteritems():
184
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
185

    
186

    
187
def get_container_headers(request):
188
    meta = get_header_prefix(request, 'X-Container-Meta-')
189
    check_meta_headers(meta)
190
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
191
    return meta, policy
192

    
193

    
194
def put_container_headers(request, response, meta, policy):
195
    if 'count' in meta:
196
        response['X-Container-Object-Count'] = meta['count']
197
    if 'bytes' in meta:
198
        response['X-Container-Bytes-Used'] = meta['bytes']
199
    response['Last-Modified'] = http_date(int(meta['modified']))
200
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
201
        response[smart_str(
202
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
203
    l = [smart_str(x, strings_only=True) for x in meta['object_meta']
204
         if x.startswith('X-Object-Meta-')]
205
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
206
    response['X-Container-Block-Size'] = request.backend.block_size
207
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
208
    if 'until_timestamp' in meta:
209
        response['X-Container-Until-Timestamp'] = http_date(
210
            int(meta['until_timestamp']))
211
    for k, v in policy.iteritems():
212
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
213

    
214

    
215
def get_object_headers(request):
216
    content_type = request.META.get('CONTENT_TYPE', None)
217
    meta = get_header_prefix(request, 'X-Object-Meta-')
218
    check_meta_headers(meta)
219
    if request.META.get('HTTP_CONTENT_ENCODING'):
220
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
221
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
222
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
223
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
224
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
225
    return content_type, meta, get_sharing(request), get_public(request)
226

    
227

    
228
def put_object_headers(response, meta, restricted=False, token=None):
229
    response['ETag'] = meta['checksum']
230
    response['Content-Length'] = meta['bytes']
231
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
232
    response['Last-Modified'] = http_date(int(meta['modified']))
233
    if not restricted:
234
        response['X-Object-Hash'] = meta['hash']
235
        response['X-Object-UUID'] = meta['uuid']
236
        modified_by = retrieve_displayname(token, meta['modified_by'])
237
        if TRANSLATE_UUIDS:
238
            response['X-Object-Modified-By'] = smart_str(
239
                    modified_by, strings_only=True)
240
        response['X-Object-Version'] = meta['version']
241
        response['X-Object-Version-Timestamp'] = http_date(
242
            int(meta['version_timestamp']))
243
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
244
            response[smart_str(
245
                k, strings_only=True)] = smart_str(meta[k], strings_only=True)
246
        for k in (
247
            'Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
248
            'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
249
                'X-Object-Public'):
250
            if k in meta:
251
                response[k] = smart_str(meta[k], strings_only=True)
252
    else:
253
        for k in ('Content-Encoding', 'Content-Disposition'):
254
            if k in meta:
255
                response[k] = smart_str(meta[k], strings_only=True)
256

    
257

    
258
def update_manifest_meta(request, v_account, meta):
259
    """Update metadata if the object has an X-Object-Manifest."""
260

    
261
    if 'X-Object-Manifest' in meta:
262
        etag = ''
263
        bytes = 0
264
        try:
265
            src_container, src_name = split_container_object_string(
266
                '/' + meta['X-Object-Manifest'])
267
            objects = request.backend.list_objects(
268
                request.user_uniq, v_account,
269
                src_container, prefix=src_name, virtual=False)
270
            for x in objects:
271
                src_meta = request.backend.get_object_meta(request.user_uniq,
272
                                                           v_account, src_container, x[0], 'pithos', x[1])
273
                etag += src_meta['checksum']
274
                bytes += src_meta['bytes']
275
        except:
276
            # Ignore errors.
277
            return
278
        meta['bytes'] = bytes
279
        md5 = hashlib.md5()
280
        md5.update(etag)
281
        meta['checksum'] = md5.hexdigest().lower()
282

    
283
def is_uuid(str):
284
    try:
285
        uuid.UUID(str)
286
    except ValueError:
287
       return False
288
    else:
289
       return True
290

    
291
##########################
292
# USER CATALOG utilities #
293
##########################
294

    
295
def retrieve_displayname(token, uuid, fail_silently=True):
296
    displayname = get_displayname(
297
            token, uuid, USER_CATALOG_URL, AUTHENTICATION_USERS)
298
    if not displayname and not fail_silently:
299
        raise ItemNotExists(uuid)
300
    elif not displayname:
301
        # just return the uuid
302
        return uuid
303
    return displayname
304

    
305
def retrieve_displaynames(token, uuids, return_dict=False, fail_silently=True):
306
    catalog =  get_displaynames(
307
            token, uuids, USER_CATALOG_URL, AUTHENTICATION_USERS) or {}
308
    missing = list(set(uuids) - set(catalog))
309
    if missing and not fail_silently:
310
        raise ItemNotExists('Unknown displaynames: %s' % ', '.join(missing))
311
    return catalog if return_dict else [catalog.get(i) for i in uuids]
312

    
313
def retrieve_uuid(token, displayname):
314
    if is_uuid(displayname):
315
        return displayname
316

    
317
    uuid = get_user_uuid(
318
        token, displayname, USER_CATALOG_URL, AUTHENTICATION_USERS)
319
    if not uuid:
320
        raise ItemNotExists(displayname)
321
    return uuid
322

    
323
def retrieve_uuids(token, displaynames, return_dict=False, fail_silently=True):
324
    catalog = get_uuids(
325
            token, displaynames, USER_CATALOG_URL, AUTHENTICATION_USERS) or {}
326
    missing = list(set(displaynames) - set(catalog))
327
    if missing and not fail_silently:
328
        raise ItemNotExists('Unknown uuids: %s' % ', '.join(missing))
329
    return catalog if return_dict else [catalog.get(i) for i in displaynames]
330

    
331
def replace_permissions_displayname(token, holder):
332
    if holder == '*':
333
        return holder
334
    try:
335
        # check first for a group permission
336
        account, group = holder.split(':')
337
    except ValueError:
338
        return retrieve_uuid(token, holder)
339
    else:
340
        return ':'.join([retrieve_uuid(token, account), group])
341

    
342
def replace_permissions_uuid(token, holder):
343
    if holder == '*':
344
        return holder
345
    try:
346
        # check first for a group permission
347
        account, group = holder.split(':')
348
    except ValueError:
349
        return retrieve_displayname(token, holder)
350
    else:
351
        return ':'.join([retrieve_displayname(token, account), group])
352

    
353
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
354
    if permissions is None:
355
        return
356
    allowed, perm_path, perms = permissions
357
    if len(perms) == 0:
358
        return
359

    
360
    # replace uuid with displayname
361
    if TRANSLATE_UUIDS:
362
        perms['read'] = [replace_permissions_uuid(
363
                getattr(request, 'token', None), x) \
364
                    for x in perms.get('read', [])]
365
        perms['write'] = [replace_permissions_uuid(
366
                getattr(request, 'token', None), x) \
367
                    for x in perms.get('write', [])]
368

    
369
    ret = []
370

    
371
    r = ','.join(perms.get('read', []))
372
    if r:
373
        ret.append('read=' + r)
374
    w = ','.join(perms.get('write', []))
375
    if w:
376
        ret.append('write=' + w)
377
    meta['X-Object-Sharing'] = '; '.join(ret)
378
    if '/'.join((v_account, v_container, v_object)) != perm_path:
379
        meta['X-Object-Shared-By'] = perm_path
380
    if request.user_uniq != v_account:
381
        meta['X-Object-Allowed-To'] = allowed
382

    
383

    
384
def update_public_meta(public, meta):
385
    if not public:
386
        return
387
    meta['X-Object-Public'] = '/public/' + encode_url(public)
388

    
389

    
390
def validate_modification_preconditions(request, meta):
391
    """Check that the modified timestamp conforms with the preconditions set."""
392

    
393
    if 'modified' not in meta:
394
        return  # TODO: Always return?
395

    
396
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
397
    if if_modified_since is not None:
398
        if_modified_since = parse_http_date_safe(if_modified_since)
399
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
400
        raise NotModified('Resource has not been modified')
401

    
402
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
403
    if if_unmodified_since is not None:
404
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
405
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
406
        raise PreconditionFailed('Resource has been modified')
407

    
408

    
409
def validate_matching_preconditions(request, meta):
410
    """Check that the ETag conforms with the preconditions set."""
411

    
412
    etag = meta['checksum']
413
    if not etag:
414
        etag = None
415

    
416
    if_match = request.META.get('HTTP_IF_MATCH')
417
    if if_match is not None:
418
        if etag is None:
419
            raise PreconditionFailed('Resource does not exist')
420
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
421
            raise PreconditionFailed('Resource ETag does not match')
422

    
423
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
424
    if if_none_match is not None:
425
        # TODO: If this passes, must ignore If-Modified-Since header.
426
        if etag is not None:
427
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
428
                # TODO: Continue if an If-Modified-Since header is present.
429
                if request.method in ('HEAD', 'GET'):
430
                    raise NotModified('Resource ETag matches')
431
                raise PreconditionFailed('Resource exists or ETag matches')
432

    
433

    
434
def split_container_object_string(s):
435
    if not len(s) > 0 or s[0] != '/':
436
        raise ValueError
437
    s = s[1:]
438
    pos = s.find('/')
439
    if pos == -1 or pos == len(s) - 1:
440
        raise ValueError
441
    return s[:pos], s[(pos + 1):]
442

    
443

    
444
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False, delimiter=None):
445
    """Copy or move an object."""
446

    
447
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
448
        del(request.META['CONTENT_TYPE'])
449
    content_type, meta, permissions, public = get_object_headers(request)
450
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
451
    try:
452
        if move:
453
            version_id = request.backend.move_object(
454
                request.user_uniq, src_account, src_container, src_name,
455
                dest_account, dest_container, dest_name,
456
                content_type, 'pithos', meta, False, permissions, delimiter)
457
        else:
458
            version_id = request.backend.copy_object(
459
                request.user_uniq, src_account, src_container, src_name,
460
                dest_account, dest_container, dest_name,
461
                content_type, 'pithos', meta, False, permissions, src_version, delimiter)
462
    except NotAllowedError:
463
        raise Forbidden('Not allowed')
464
    except (ItemNotExists, VersionNotExists):
465
        raise ItemNotFound('Container or object does not exist')
466
    except ValueError:
467
        raise BadRequest('Invalid sharing header')
468
    except QuotaError, e:
469
        raise RequestEntityTooLarge('Quota error: %s' % e)
470
    if public is not None:
471
        try:
472
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
473
        except NotAllowedError:
474
            raise Forbidden('Not allowed')
475
        except ItemNotExists:
476
            raise ItemNotFound('Object does not exist')
477
    return version_id
478

    
479

    
480
def get_int_parameter(p):
481
    if p is not None:
482
        try:
483
            p = int(p)
484
        except ValueError:
485
            return None
486
        if p < 0:
487
            return None
488
    return p
489

    
490

    
491
def get_content_length(request):
492
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
493
    if content_length is None:
494
        raise LengthRequired('Missing or invalid Content-Length header')
495
    return content_length
496

    
497

    
498
def get_range(request, size):
499
    """Parse a Range header from the request.
500

501
    Either returns None, when the header is not existent or should be ignored,
502
    or a list of (offset, length) tuples - should be further checked.
503
    """
504

    
505
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
506
    if not ranges.startswith('bytes='):
507
        return None
508

    
509
    ret = []
510
    for r in (x.strip() for x in ranges[6:].split(',')):
511
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
512
        m = p.match(r)
513
        if not m:
514
            return None
515
        offset = m.group('offset')
516
        upto = m.group('upto')
517
        if offset == '' and upto == '':
518
            return None
519

    
520
        if offset != '':
521
            offset = int(offset)
522
            if upto != '':
523
                upto = int(upto)
524
                if offset > upto:
525
                    return None
526
                ret.append((offset, upto - offset + 1))
527
            else:
528
                ret.append((offset, size - offset))
529
        else:
530
            length = int(upto)
531
            ret.append((size - length, length))
532

    
533
    return ret
534

    
535

    
536
def get_content_range(request):
537
    """Parse a Content-Range header from the request.
538

539
    Either returns None, when the header is not existent or should be ignored,
540
    or an (offset, length, total) tuple - check as length, total may be None.
541
    Returns (None, None, None) if the provided range is '*/*'.
542
    """
543

    
544
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
545
    if not ranges:
546
        return None
547

    
548
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
549
    m = p.match(ranges)
550
    if not m:
551
        if ranges == 'bytes */*':
552
            return (None, None, None)
553
        return None
554
    offset = int(m.group('offset'))
555
    upto = m.group('upto')
556
    total = m.group('total')
557
    if upto != '':
558
        upto = int(upto)
559
    else:
560
        upto = None
561
    if total != '*':
562
        total = int(total)
563
    else:
564
        total = None
565
    if (upto is not None and offset > upto) or \
566
        (total is not None and offset >= total) or \
567
            (total is not None and upto is not None and upto >= total):
568
        return None
569

    
570
    if upto is None:
571
        length = None
572
    else:
573
        length = upto - offset + 1
574
    return (offset, length, total)
575

    
576

    
577
def get_sharing(request):
578
    """Parse an X-Object-Sharing header from the request.
579

580
    Raises BadRequest on error.
581
    """
582

    
583
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
584
    if permissions is None:
585
        return None
586

    
587
    # TODO: Document or remove '~' replacing.
588
    permissions = permissions.replace('~', '')
589

    
590
    ret = {}
591
    permissions = permissions.replace(' ', '')
592
    if permissions == '':
593
        return ret
594
    for perm in (x for x in permissions.split(';')):
595
        if perm.startswith('read='):
596
            ret['read'] = list(set(
597
                [v.replace(' ', '').lower() for v in perm[5:].split(',')]))
598
            if '' in ret['read']:
599
                ret['read'].remove('')
600
            if '*' in ret['read']:
601
                ret['read'] = ['*']
602
            if len(ret['read']) == 0:
603
                raise BadRequest(
604
                    'Bad X-Object-Sharing header value: invalid length')
605
        elif perm.startswith('write='):
606
            ret['write'] = list(set(
607
                [v.replace(' ', '').lower() for v in perm[6:].split(',')]))
608
            if '' in ret['write']:
609
                ret['write'].remove('')
610
            if '*' in ret['write']:
611
                ret['write'] = ['*']
612
            if len(ret['write']) == 0:
613
                raise BadRequest(
614
                    'Bad X-Object-Sharing header value: invalid length')
615
        else:
616
            raise BadRequest(
617
                'Bad X-Object-Sharing header value: missing prefix')
618

    
619
    # replace displayname with uuid
620
    if TRANSLATE_UUIDS:
621
        try:
622
            ret['read'] = [replace_permissions_displayname(
623
                    getattr(request, 'token', None), x) \
624
                        for x in ret.get('read', [])]
625
            ret['write'] = [replace_permissions_displayname(
626
                    getattr(request, 'token', None), x) \
627
                        for x in ret.get('write', [])]
628
        except ItemNotExists, e:
629
            raise BadRequest(
630
                'Bad X-Object-Sharing header value: unknown account: %s' % e)
631

    
632
    # Keep duplicates only in write list.
633
    dups = [x for x in ret.get(
634
        'read', []) if x in ret.get('write', []) and x != '*']
635
    if dups:
636
        for x in dups:
637
            ret['read'].remove(x)
638
        if len(ret['read']) == 0:
639
            del(ret['read'])
640

    
641
    return ret
642

    
643

    
644
def get_public(request):
645
    """Parse an X-Object-Public header from the request.
646

647
    Raises BadRequest on error.
648
    """
649

    
650
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
651
    if public is None:
652
        return None
653

    
654
    public = public.replace(' ', '').lower()
655
    if public == 'true':
656
        return True
657
    elif public == 'false' or public == '':
658
        return False
659
    raise BadRequest('Bad X-Object-Public header value')
660

    
661

    
662
def raw_input_socket(request):
663
    """Return the socket for reading the rest of the request."""
664

    
665
    server_software = request.META.get('SERVER_SOFTWARE')
666
    if server_software and server_software.startswith('mod_python'):
667
        return request._req
668
    if 'wsgi.input' in request.environ:
669
        return request.environ['wsgi.input']
670
    raise NotImplemented('Unknown server software')
671

    
672
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024)  # 5GB
673

    
674

    
675
def socket_read_iterator(request, length=0, blocksize=4096):
676
    """Return a maximum of blocksize data read from the socket in each iteration.
677

678
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
679
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
680
    """
681

    
682
    sock = raw_input_socket(request)
683
    if length < 0:  # Chunked transfers
684
        # Small version (server does the dechunking).
685
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
686
            while length < MAX_UPLOAD_SIZE:
687
                data = sock.read(blocksize)
688
                if data == '':
689
                    return
690
                yield data
691
            raise BadRequest('Maximum size is reached')
692

    
693
        # Long version (do the dechunking).
694
        data = ''
695
        while length < MAX_UPLOAD_SIZE:
696
            # Get chunk size.
697
            if hasattr(sock, 'readline'):
698
                chunk_length = sock.readline()
699
            else:
700
                chunk_length = ''
701
                while chunk_length[-1:] != '\n':
702
                    chunk_length += sock.read(1)
703
                chunk_length.strip()
704
            pos = chunk_length.find(';')
705
            if pos >= 0:
706
                chunk_length = chunk_length[:pos]
707
            try:
708
                chunk_length = int(chunk_length, 16)
709
            except Exception, e:
710
                raise BadRequest('Bad chunk size')
711
                                 # TODO: Change to something more appropriate.
712
            # Check if done.
713
            if chunk_length == 0:
714
                if len(data) > 0:
715
                    yield data
716
                return
717
            # Get the actual data.
718
            while chunk_length > 0:
719
                chunk = sock.read(min(chunk_length, blocksize))
720
                chunk_length -= len(chunk)
721
                if length > 0:
722
                    length += len(chunk)
723
                data += chunk
724
                if len(data) >= blocksize:
725
                    ret = data[:blocksize]
726
                    data = data[blocksize:]
727
                    yield ret
728
            sock.read(2)  # CRLF
729
        raise BadRequest('Maximum size is reached')
730
    else:
731
        if length > MAX_UPLOAD_SIZE:
732
            raise BadRequest('Maximum size is reached')
733
        while length > 0:
734
            data = sock.read(min(length, blocksize))
735
            if not data:
736
                raise BadRequest()
737
            length -= len(data)
738
            yield data
739

    
740

    
741
class SaveToBackendHandler(FileUploadHandler):
742
    """Handle a file from an HTML form the django way."""
743

    
744
    def __init__(self, request=None):
745
        super(SaveToBackendHandler, self).__init__(request)
746
        self.backend = request.backend
747

    
748
    def put_data(self, length):
749
        if len(self.data) >= length:
750
            block = self.data[:length]
751
            self.file.hashmap.append(self.backend.put_block(block))
752
            self.md5.update(block)
753
            self.data = self.data[length:]
754

    
755
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
756
        self.md5 = hashlib.md5()
757
        self.data = ''
758
        self.file = UploadedFile(
759
            name=file_name, content_type=content_type, charset=charset)
760
        self.file.size = 0
761
        self.file.hashmap = []
762

    
763
    def receive_data_chunk(self, raw_data, start):
764
        self.data += raw_data
765
        self.file.size += len(raw_data)
766
        self.put_data(self.request.backend.block_size)
767
        return None
768

    
769
    def file_complete(self, file_size):
770
        l = len(self.data)
771
        if l > 0:
772
            self.put_data(l)
773
        self.file.etag = self.md5.hexdigest().lower()
774
        return self.file
775

    
776

    
777
class ObjectWrapper(object):
778
    """Return the object's data block-per-block in each iteration.
779

780
    Read from the object using the offset and length provided in each entry of the range list.
781
    """
782

    
783
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
784
        self.backend = backend
785
        self.ranges = ranges
786
        self.sizes = sizes
787
        self.hashmaps = hashmaps
788
        self.boundary = boundary
789
        self.size = sum(self.sizes)
790

    
791
        self.file_index = 0
792
        self.block_index = 0
793
        self.block_hash = -1
794
        self.block = ''
795

    
796
        self.range_index = -1
797
        self.offset, self.length = self.ranges[0]
798

    
799
    def __iter__(self):
800
        return self
801

    
802
    def part_iterator(self):
803
        if self.length > 0:
804
            # Get the file for the current offset.
805
            file_size = self.sizes[self.file_index]
806
            while self.offset >= file_size:
807
                self.offset -= file_size
808
                self.file_index += 1
809
                file_size = self.sizes[self.file_index]
810

    
811
            # Get the block for the current position.
812
            self.block_index = int(self.offset / self.backend.block_size)
813
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
814
                self.block_hash = self.hashmaps[
815
                    self.file_index][self.block_index]
816
                try:
817
                    self.block = self.backend.get_block(self.block_hash)
818
                except ItemNotExists:
819
                    raise ItemNotFound('Block does not exist')
820

    
821
            # Get the data from the block.
822
            bo = self.offset % self.backend.block_size
823
            bs = self.backend.block_size
824
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
825
                    self.sizes[self.file_index] % self.backend.block_size):
826
                bs = self.sizes[self.file_index] % self.backend.block_size
827
            bl = min(self.length, bs - bo)
828
            data = self.block[bo:bo + bl]
829
            self.offset += bl
830
            self.length -= bl
831
            return data
832
        else:
833
            raise StopIteration
834

    
835
    def next(self):
836
        if len(self.ranges) == 1:
837
            return self.part_iterator()
838
        if self.range_index == len(self.ranges):
839
            raise StopIteration
840
        try:
841
            if self.range_index == -1:
842
                raise StopIteration
843
            return self.part_iterator()
844
        except StopIteration:
845
            self.range_index += 1
846
            out = []
847
            if self.range_index < len(self.ranges):
848
                # Part header.
849
                self.offset, self.length = self.ranges[self.range_index]
850
                self.file_index = 0
851
                if self.range_index > 0:
852
                    out.append('')
853
                out.append('--' + self.boundary)
854
                out.append('Content-Range: bytes %d-%d/%d' % (
855
                    self.offset, self.offset + self.length - 1, self.size))
856
                out.append('Content-Transfer-Encoding: binary')
857
                out.append('')
858
                out.append('')
859
                return '\r\n'.join(out)
860
            else:
861
                # Footer.
862
                out.append('')
863
                out.append('--' + self.boundary + '--')
864
                out.append('')
865
                return '\r\n'.join(out)
866

    
867

    
868
def object_data_response(request, sizes, hashmaps, meta, public=False):
869
    """Get the HttpResponse object for replying with the object's data."""
870

    
871
    # Range handling.
872
    size = sum(sizes)
873
    ranges = get_range(request, size)
874
    if ranges is None:
875
        ranges = [(0, size)]
876
        ret = 200
877
    else:
878
        check = [True for offset, length in ranges if
879
                 length <= 0 or length > size or
880
                 offset < 0 or offset >= size or
881
                 offset + length > size]
882
        if len(check) > 0:
883
            raise RangeNotSatisfiable('Requested range exceeds object limits')
884
        ret = 206
885
        if_range = request.META.get('HTTP_IF_RANGE')
886
        if if_range:
887
            try:
888
                # Modification time has passed instead.
889
                last_modified = parse_http_date(if_range)
890
                if last_modified != meta['modified']:
891
                    ranges = [(0, size)]
892
                    ret = 200
893
            except ValueError:
894
                if if_range != meta['checksum']:
895
                    ranges = [(0, size)]
896
                    ret = 200
897

    
898
    if ret == 206 and len(ranges) > 1:
899
        boundary = uuid.uuid4().hex
900
    else:
901
        boundary = ''
902
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
903
    response = HttpResponse(wrapper, status=ret)
904
    put_object_headers(
905
            response, meta, restricted=public, token=getattr(request, 'token', None))
906
    if ret == 206:
907
        if len(ranges) == 1:
908
            offset, length = ranges[0]
909
            response[
910
                'Content-Length'] = length  # Update with the correct length.
911
            response['Content-Range'] = 'bytes %d-%d/%d' % (
912
                offset, offset + length - 1, size)
913
        else:
914
            del(response['Content-Length'])
915
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (
916
                boundary,)
917
    return response
918

    
919

    
920
def put_object_block(request, hashmap, data, offset):
921
    """Put one block of data at the given offset."""
922

    
923
    bi = int(offset / request.backend.block_size)
924
    bo = offset % request.backend.block_size
925
    bl = min(len(data), request.backend.block_size - bo)
926
    if bi < len(hashmap):
927
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
928
    else:
929
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
930
    return bl  # Return ammount of data written.
931

    
932

    
933
def hashmap_md5(backend, hashmap, size):
934
    """Produce the MD5 sum from the data in the hashmap."""
935

    
936
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
937
    md5 = hashlib.md5()
938
    bs = backend.block_size
939
    for bi, hash in enumerate(hashmap):
940
        data = backend.get_block(hash)  # Blocks come in padded.
941
        if bi == len(hashmap) - 1:
942
            data = data[:size % bs]
943
        md5.update(data)
944
    return md5.hexdigest().lower()
945

    
946

    
947
def simple_list_response(request, l):
948
    if request.serialization == 'text':
949
        return '\n'.join(l) + '\n'
950
    if request.serialization == 'xml':
951
        return render_to_string('items.xml', {'items': l})
952
    if request.serialization == 'json':
953
        return json.dumps(l)
954

    
955

    
956
from pithos.backends.util import PithosBackendPool
957
POOL_SIZE = 5
958
if RADOS_STORAGE:
959
    BLOCK_PARAMS = { 'mappool': RADOS_POOL_MAPS,
960
                     'blockpool': RADOS_POOL_BLOCKS,
961
                   }
962
else:
963
    BLOCK_PARAMS = { 'mappool': None,
964
                     'blockpool': None,
965
                   }
966

    
967

    
968
_pithos_backend_pool = PithosBackendPool(size=POOL_SIZE,
969
                                         db_module=BACKEND_DB_MODULE,
970
                                         db_connection=BACKEND_DB_CONNECTION,
971
                                         block_module=BACKEND_BLOCK_MODULE,
972
                                         block_path=BACKEND_BLOCK_PATH,
973
                                         block_umask=BACKEND_BLOCK_UMASK,
974
                                         queue_module=BACKEND_QUEUE_MODULE,
975
                                         queue_hosts=BACKEND_QUEUE_HOSTS,
976
                                         queue_exchange=BACKEND_QUEUE_EXCHANGE,
977
                                         quotaholder_url=QUOTAHOLDER_URL,
978
                                         quotaholder_token=QUOTAHOLDER_TOKEN,
979
                                         free_versioning=BACKEND_FREE_VERSIONING,
980
                                         block_params=BLOCK_PARAMS)
981

    
982
def get_backend():
983
    backend = _pithos_backend_pool.pool_get()
984
    backend.default_policy['quota'] = BACKEND_QUOTA
985
    backend.default_policy['versioning'] = BACKEND_VERSIONING
986
    backend.messages = []
987
    return backend
988

    
989

    
990
def update_request_headers(request):
991
    # Handle URL-encoded keys and values.
992
    meta = dict([(
993
        k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
994
    for k, v in meta.iteritems():
995
        try:
996
            k.decode('ascii')
997
            v.decode('ascii')
998
        except UnicodeDecodeError:
999
            raise BadRequest('Bad character in headers.')
1000
        if '%' in k or '%' in v:
1001
            del(request.META[k])
1002
            request.META[unquote(k)] = smart_unicode(unquote(
1003
                v), strings_only=True)
1004

    
1005

    
1006
def update_response_headers(request, response):
1007
    if request.serialization == 'xml':
1008
        response['Content-Type'] = 'application/xml; charset=UTF-8'
1009
    elif request.serialization == 'json':
1010
        response['Content-Type'] = 'application/json; charset=UTF-8'
1011
    elif not response['Content-Type']:
1012
        response['Content-Type'] = 'text/plain; charset=UTF-8'
1013

    
1014
    if (not response.has_header('Content-Length') and
1015
        not (response.has_header('Content-Type') and
1016
             response['Content-Type'].startswith('multipart/byteranges'))):
1017
        response['Content-Length'] = len(response.content)
1018

    
1019
    # URL-encode unicode in headers.
1020
    meta = response.items()
1021
    for k, v in meta:
1022
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
1023
                k.startswith('X-Object-') or k.startswith('Content-')):
1024
            del(response[k])
1025
            response[quote(k)] = quote(v, safe='/=,:@; ')
1026

    
1027

    
1028
def render_fault(request, fault):
1029
    if isinstance(fault, InternalServerError) and settings.DEBUG:
1030
        fault.details = format_exc(fault)
1031

    
1032
    request.serialization = 'text'
1033
    data = fault.message + '\n'
1034
    if fault.details:
1035
        data += '\n' + fault.details
1036
    response = HttpResponse(data, status=fault.code)
1037
    update_response_headers(request, response)
1038
    return response
1039

    
1040

    
1041
def request_serialization(request, format_allowed=False):
1042
    """Return the serialization format requested.
1043

1044
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
1045
    """
1046

    
1047
    if not format_allowed:
1048
        return 'text'
1049

    
1050
    format = request.GET.get('format')
1051
    if format == 'json':
1052
        return 'json'
1053
    elif format == 'xml':
1054
        return 'xml'
1055

    
1056
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
1057
        accept, sep, rest = item.strip().partition(';')
1058
        if accept == 'application/json':
1059
            return 'json'
1060
        elif accept == 'application/xml' or accept == 'text/xml':
1061
            return 'xml'
1062

    
1063
    return 'text'
1064

    
1065
def get_pithos_usage(usage):
1066
    for u in usage:
1067
        if u.get('name') == 'pithos+.diskspace':
1068
            return u
1069

    
1070
def api_method(http_method=None, format_allowed=False, user_required=True,
1071
        request_usage=False):
1072
    """Decorator function for views that implement an API method."""
1073

    
1074
    def decorator(func):
1075
        @wraps(func)
1076
        def wrapper(request, *args, **kwargs):
1077
            try:
1078
                if http_method and request.method != http_method:
1079
                    raise BadRequest('Method not allowed.')
1080

    
1081
                if user_required:
1082
                    token = None
1083
                    if request.method in ('HEAD', 'GET') and COOKIE_NAME in request.COOKIES:
1084
                        cookie_value = unquote(
1085
                            request.COOKIES.get(COOKIE_NAME, ''))
1086
                        account, sep, token = cookie_value.partition('|')
1087
                    get_user(request,
1088
                             AUTHENTICATION_URL,
1089
                             AUTHENTICATION_USERS,
1090
                             token,
1091
                             user_required)
1092
                    if  getattr(request, 'user', None) is None:
1093
                        raise Unauthorized('Access denied')
1094
                    assert getattr(request, 'user_uniq', None) != None
1095
                    request.user_usage = get_pithos_usage(request.user.get('usage', []))
1096
                    request.token = request.GET.get('X-Auth-Token', request.META.get('HTTP_X_AUTH_TOKEN', token))
1097

    
1098
                # The args variable may contain up to (account, container, object).
1099
                if len(args) > 1 and len(args[1]) > 256:
1100
                    raise BadRequest('Container name too large.')
1101
                if len(args) > 2 and len(args[2]) > 1024:
1102
                    raise BadRequest('Object name too large.')
1103

    
1104
                # Format and check headers.
1105
                update_request_headers(request)
1106

    
1107
                # Fill in custom request variables.
1108
                request.serialization = request_serialization(
1109
                    request, format_allowed)
1110
                request.backend = get_backend()
1111

    
1112
                response = func(request, *args, **kwargs)
1113
                update_response_headers(request, response)
1114
                return response
1115
            except Fault, fault:
1116
                if fault.code >= 500:
1117
                    logger.exception("API Fault")
1118
                return render_fault(request, fault)
1119
            except BaseException, e:
1120
                logger.exception('Unexpected error: %s' % e)
1121
                fault = InternalServerError('Unexpected error: %s' % e)
1122
                return render_fault(request, fault)
1123
            finally:
1124
                if getattr(request, 'backend', None) is not None:
1125
                    request.backend.close()
1126
        return wrapper
1127
    return decorator