Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 4a9e3f32

History | View | Annotate | Download (41.1 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
#
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
#
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
#
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from synnefo.lib.parsedate import parse_http_date_safe, parse_http_date
52
from synnefo.lib.astakos import get_user
53

    
54
from pithos.api.faults import (
55
    Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
56
    Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
57
    RangeNotSatisfiable, InternalServerError, NotImplemented)
58
from pithos.api.short_url import encode_url
59
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
60
                                 BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
61
                                 BACKEND_BLOCK_UMASK,
62
                                 BACKEND_QUEUE_MODULE, BACKEND_QUEUE_HOSTS,
63
                                 BACKEND_QUEUE_EXCHANGE,
64
                                 QUOTAHOLDER_URL, QUOTAHOLDER_TOKEN,
65
                                 BACKEND_QUOTA, BACKEND_VERSIONING,
66
                                 BACKEND_FREE_VERSIONING,
67
                                 AUTHENTICATION_URL, AUTHENTICATION_USERS,
68
                                 COOKIE_NAME, USER_CATALOG_URL,
69
                                 RADOS_STORAGE, RADOS_POOL_BLOCKS,
70
                                 RADOS_POOL_MAPS, TRANSLATE_UUIDS)
71
from pithos.backends import connect_backend
72
from pithos.backends.base import (NotAllowedError, QuotaError, ItemNotExists,
73
                                  VersionNotExists)
74
from synnefo.lib.astakos import (get_user_uuid, get_displayname,
75
                                 get_uuids, get_displaynames)
76

    
77
import logging
78
import re
79
import hashlib
80
import uuid
81
import decimal
82

    
83
logger = logging.getLogger(__name__)
84

    
85

    
86
class UTC(tzinfo):
87
    def utcoffset(self, dt):
88
        return timedelta(0)
89

    
90
    def tzname(self, dt):
91
        return 'UTC'
92

    
93
    def dst(self, dt):
94
        return timedelta(0)
95

    
96

    
97
def json_encode_decimal(obj):
98
    if isinstance(obj, decimal.Decimal):
99
        return str(obj)
100
    raise TypeError(repr(obj) + " is not JSON serializable")
101

    
102

    
103
def isoformat(d):
104
    """Return an ISO8601 date string that includes a timezone."""
105

    
106
    return d.replace(tzinfo=UTC()).isoformat()
107

    
108

    
109
def rename_meta_key(d, old, new):
110
    if old not in d:
111
        return
112
    d[new] = d[old]
113
    del(d[old])
114

    
115

    
116
def printable_header_dict(d):
117
    """Format a meta dictionary for printing out json/xml.
118

119
    Convert all keys to lower case and replace dashes with underscores.
120
    Format 'last_modified' timestamp.
121
    """
122

    
123
    if 'last_modified' in d and d['last_modified']:
124
        d['last_modified'] = isoformat(
125
            datetime.fromtimestamp(d['last_modified']))
126
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
127

    
128

    
129
def format_header_key(k):
130
    """Convert underscores to dashes and capitalize intra-dash strings."""
131
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
132

    
133

    
134
def get_header_prefix(request, prefix):
135
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
136

    
137
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
138
    # TODO: Document or remove '~' replacing.
139
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
140

    
141

    
142
def check_meta_headers(meta):
143
    if len(meta) > 90:
144
        raise BadRequest('Too many headers.')
145
    for k, v in meta.iteritems():
146
        if len(k) > 128:
147
            raise BadRequest('Header name too large.')
148
        if len(v) > 256:
149
            raise BadRequest('Header value too large.')
150

    
151

    
152
def get_account_headers(request):
153
    meta = get_header_prefix(request, 'X-Account-Meta-')
154
    check_meta_headers(meta)
155
    groups = {}
156
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
157
        n = k[16:].lower()
158
        if '-' in n or '_' in n:
159
            raise BadRequest('Bad characters in group name')
160
        groups[n] = v.replace(' ', '').split(',')
161
        while '' in groups[n]:
162
            groups[n].remove('')
163
    return meta, groups
164

    
165

    
166
def put_account_headers(response, meta, groups, policy):
167
    if 'count' in meta:
168
        response['X-Account-Container-Count'] = meta['count']
169
    if 'bytes' in meta:
170
        response['X-Account-Bytes-Used'] = meta['bytes']
171
    response['Last-Modified'] = http_date(int(meta['modified']))
172
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
173
        response[smart_str(
174
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
175
    if 'until_timestamp' in meta:
176
        response['X-Account-Until-Timestamp'] = http_date(
177
            int(meta['until_timestamp']))
178
    for k, v in groups.iteritems():
179
        k = smart_str(k, strings_only=True)
180
        k = format_header_key('X-Account-Group-' + k)
181
        v = smart_str(','.join(v), strings_only=True)
182
        response[k] = v
183
    for k, v in policy.iteritems():
184
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
185

    
186

    
187
def get_container_headers(request):
188
    meta = get_header_prefix(request, 'X-Container-Meta-')
189
    check_meta_headers(meta)
190
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
191
    return meta, policy
192

    
193

    
194
def put_container_headers(request, response, meta, policy):
195
    if 'count' in meta:
196
        response['X-Container-Object-Count'] = meta['count']
197
    if 'bytes' in meta:
198
        response['X-Container-Bytes-Used'] = meta['bytes']
199
    response['Last-Modified'] = http_date(int(meta['modified']))
200
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
201
        response[smart_str(
202
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
203
    l = [smart_str(x, strings_only=True) for x in meta['object_meta']
204
         if x.startswith('X-Object-Meta-')]
205
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
206
    response['X-Container-Block-Size'] = request.backend.block_size
207
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
208
    if 'until_timestamp' in meta:
209
        response['X-Container-Until-Timestamp'] = http_date(
210
            int(meta['until_timestamp']))
211
    for k, v in policy.iteritems():
212
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
213

    
214

    
215
def get_object_headers(request):
216
    content_type = request.META.get('CONTENT_TYPE', None)
217
    meta = get_header_prefix(request, 'X-Object-Meta-')
218
    check_meta_headers(meta)
219
    if request.META.get('HTTP_CONTENT_ENCODING'):
220
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
221
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
222
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
223
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
224
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
225
    return content_type, meta, get_sharing(request), get_public(request)
226

    
227

    
228
def put_object_headers(response, meta, restricted=False, token=None):
229
    response['ETag'] = meta['checksum']
230
    response['Content-Length'] = meta['bytes']
231
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
232
    response['Last-Modified'] = http_date(int(meta['modified']))
233
    if not restricted:
234
        response['X-Object-Hash'] = meta['hash']
235
        response['X-Object-UUID'] = meta['uuid']
236
        modified_by = retrieve_displayname(token, meta['modified_by'])
237
        if TRANSLATE_UUIDS:
238
            response['X-Object-Modified-By'] = smart_str(
239
                    modified_by, strings_only=True)
240
        response['X-Object-Version'] = meta['version']
241
        response['X-Object-Version-Timestamp'] = http_date(
242
            int(meta['version_timestamp']))
243
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
244
            response[smart_str(
245
                k, strings_only=True)] = smart_str(meta[k], strings_only=True)
246
        for k in (
247
            'Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
248
            'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
249
                'X-Object-Public'):
250
            if k in meta:
251
                response[k] = smart_str(meta[k], strings_only=True)
252
    else:
253
        for k in ('Content-Encoding', 'Content-Disposition'):
254
            if k in meta:
255
                response[k] = smart_str(meta[k], strings_only=True)
256

    
257

    
258
def update_manifest_meta(request, v_account, meta):
259
    """Update metadata if the object has an X-Object-Manifest."""
260

    
261
    if 'X-Object-Manifest' in meta:
262
        etag = ''
263
        bytes = 0
264
        try:
265
            src_container, src_name = split_container_object_string(
266
                '/' + meta['X-Object-Manifest'])
267
            objects = request.backend.list_objects(
268
                request.user_uniq, v_account,
269
                src_container, prefix=src_name, virtual=False)
270
            for x in objects:
271
                src_meta = request.backend.get_object_meta(request.user_uniq,
272
                                                           v_account, src_container, x[0], 'pithos', x[1])
273
                etag += src_meta['checksum']
274
                bytes += src_meta['bytes']
275
        except:
276
            # Ignore errors.
277
            return
278
        meta['bytes'] = bytes
279
        md5 = hashlib.md5()
280
        md5.update(etag)
281
        meta['checksum'] = md5.hexdigest().lower()
282

    
283
def is_uuid(str):
284
    if str is None:
285
        return False
286
    try:
287
        uuid.UUID(str)
288
    except ValueError:
289
        return False
290
    else:
291
       return True
292

    
293
##########################
294
# USER CATALOG utilities #
295
##########################
296

    
297
def retrieve_displayname(token, uuid, fail_silently=True):
298
    displayname = get_displayname(
299
            token, uuid, USER_CATALOG_URL, AUTHENTICATION_USERS)
300
    if not displayname and not fail_silently:
301
        raise ItemNotExists(uuid)
302
    elif not displayname:
303
        # just return the uuid
304
        return uuid
305
    return displayname
306

    
307
def retrieve_displaynames(token, uuids, return_dict=False, fail_silently=True):
308
    catalog =  get_displaynames(
309
            token, uuids, USER_CATALOG_URL, AUTHENTICATION_USERS) or {}
310
    missing = list(set(uuids) - set(catalog))
311
    if missing and not fail_silently:
312
        raise ItemNotExists('Unknown displaynames: %s' % ', '.join(missing))
313
    return catalog if return_dict else [catalog.get(i) for i in uuids]
314

    
315
def retrieve_uuid(token, displayname):
316
    if is_uuid(displayname):
317
        return displayname
318

    
319
    uuid = get_user_uuid(
320
        token, displayname, USER_CATALOG_URL, AUTHENTICATION_USERS)
321
    if not uuid:
322
        raise ItemNotExists(displayname)
323
    return uuid
324

    
325
def retrieve_uuids(token, displaynames, return_dict=False, fail_silently=True):
326
    catalog = get_uuids(
327
            token, displaynames, USER_CATALOG_URL, AUTHENTICATION_USERS) or {}
328
    missing = list(set(displaynames) - set(catalog))
329
    if missing and not fail_silently:
330
        raise ItemNotExists('Unknown uuids: %s' % ', '.join(missing))
331
    return catalog if return_dict else [catalog.get(i) for i in displaynames]
332

    
333
def replace_permissions_displayname(token, holder):
334
    if holder == '*':
335
        return holder
336
    try:
337
        # check first for a group permission
338
        account, group = holder.split(':', 1)
339
    except ValueError:
340
        return retrieve_uuid(token, holder)
341
    else:
342
        return ':'.join([retrieve_uuid(token, account), group])
343

    
344
def replace_permissions_uuid(token, holder):
345
    if holder == '*':
346
        return holder
347
    try:
348
        # check first for a group permission
349
        account, group = holder.split(':', 1)
350
    except ValueError:
351
        return retrieve_displayname(token, holder)
352
    else:
353
        return ':'.join([retrieve_displayname(token, account), group])
354

    
355
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
356
    if permissions is None:
357
        return
358
    allowed, perm_path, perms = permissions
359
    if len(perms) == 0:
360
        return
361

    
362
    # replace uuid with displayname
363
    if TRANSLATE_UUIDS:
364
        perms['read'] = [replace_permissions_uuid(
365
                getattr(request, 'token', None), x) \
366
                    for x in perms.get('read', [])]
367
        perms['write'] = [replace_permissions_uuid(
368
                getattr(request, 'token', None), x) \
369
                    for x in perms.get('write', [])]
370

    
371
    ret = []
372

    
373
    r = ','.join(perms.get('read', []))
374
    if r:
375
        ret.append('read=' + r)
376
    w = ','.join(perms.get('write', []))
377
    if w:
378
        ret.append('write=' + w)
379
    meta['X-Object-Sharing'] = '; '.join(ret)
380
    if '/'.join((v_account, v_container, v_object)) != perm_path:
381
        meta['X-Object-Shared-By'] = perm_path
382
    if request.user_uniq != v_account:
383
        meta['X-Object-Allowed-To'] = allowed
384

    
385

    
386
def update_public_meta(public, meta):
387
    if not public:
388
        return
389
    meta['X-Object-Public'] = '/public/' + encode_url(public)
390

    
391

    
392
def validate_modification_preconditions(request, meta):
393
    """Check that the modified timestamp conforms with the preconditions set."""
394

    
395
    if 'modified' not in meta:
396
        return  # TODO: Always return?
397

    
398
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
399
    if if_modified_since is not None:
400
        if_modified_since = parse_http_date_safe(if_modified_since)
401
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
402
        raise NotModified('Resource has not been modified')
403

    
404
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
405
    if if_unmodified_since is not None:
406
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
407
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
408
        raise PreconditionFailed('Resource has been modified')
409

    
410

    
411
def validate_matching_preconditions(request, meta):
412
    """Check that the ETag conforms with the preconditions set."""
413

    
414
    etag = meta['checksum']
415
    if not etag:
416
        etag = None
417

    
418
    if_match = request.META.get('HTTP_IF_MATCH')
419
    if if_match is not None:
420
        if etag is None:
421
            raise PreconditionFailed('Resource does not exist')
422
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
423
            raise PreconditionFailed('Resource ETag does not match')
424

    
425
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
426
    if if_none_match is not None:
427
        # TODO: If this passes, must ignore If-Modified-Since header.
428
        if etag is not None:
429
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
430
                # TODO: Continue if an If-Modified-Since header is present.
431
                if request.method in ('HEAD', 'GET'):
432
                    raise NotModified('Resource ETag matches')
433
                raise PreconditionFailed('Resource exists or ETag matches')
434

    
435

    
436
def split_container_object_string(s):
437
    if not len(s) > 0 or s[0] != '/':
438
        raise ValueError
439
    s = s[1:]
440
    pos = s.find('/')
441
    if pos == -1 or pos == len(s) - 1:
442
        raise ValueError
443
    return s[:pos], s[(pos + 1):]
444

    
445

    
446
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False, delimiter=None):
447
    """Copy or move an object."""
448

    
449
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
450
        del(request.META['CONTENT_TYPE'])
451
    content_type, meta, permissions, public = get_object_headers(request)
452
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
453
    try:
454
        if move:
455
            version_id = request.backend.move_object(
456
                request.user_uniq, src_account, src_container, src_name,
457
                dest_account, dest_container, dest_name,
458
                content_type, 'pithos', meta, False, permissions, delimiter)
459
        else:
460
            version_id = request.backend.copy_object(
461
                request.user_uniq, src_account, src_container, src_name,
462
                dest_account, dest_container, dest_name,
463
                content_type, 'pithos', meta, False, permissions, src_version, delimiter)
464
    except NotAllowedError:
465
        raise Forbidden('Not allowed')
466
    except (ItemNotExists, VersionNotExists):
467
        raise ItemNotFound('Container or object does not exist')
468
    except ValueError:
469
        raise BadRequest('Invalid sharing header')
470
    except QuotaError, e:
471
        raise RequestEntityTooLarge('Quota error: %s' % e)
472
    if public is not None:
473
        try:
474
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
475
        except NotAllowedError:
476
            raise Forbidden('Not allowed')
477
        except ItemNotExists:
478
            raise ItemNotFound('Object does not exist')
479
    return version_id
480

    
481

    
482
def get_int_parameter(p):
483
    if p is not None:
484
        try:
485
            p = int(p)
486
        except ValueError:
487
            return None
488
        if p < 0:
489
            return None
490
    return p
491

    
492

    
493
def get_content_length(request):
494
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
495
    if content_length is None:
496
        raise LengthRequired('Missing or invalid Content-Length header')
497
    return content_length
498

    
499

    
500
def get_range(request, size):
501
    """Parse a Range header from the request.
502

503
    Either returns None, when the header is not existent or should be ignored,
504
    or a list of (offset, length) tuples - should be further checked.
505
    """
506

    
507
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
508
    if not ranges.startswith('bytes='):
509
        return None
510

    
511
    ret = []
512
    for r in (x.strip() for x in ranges[6:].split(',')):
513
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
514
        m = p.match(r)
515
        if not m:
516
            return None
517
        offset = m.group('offset')
518
        upto = m.group('upto')
519
        if offset == '' and upto == '':
520
            return None
521

    
522
        if offset != '':
523
            offset = int(offset)
524
            if upto != '':
525
                upto = int(upto)
526
                if offset > upto:
527
                    return None
528
                ret.append((offset, upto - offset + 1))
529
            else:
530
                ret.append((offset, size - offset))
531
        else:
532
            length = int(upto)
533
            ret.append((size - length, length))
534

    
535
    return ret
536

    
537

    
538
def get_content_range(request):
539
    """Parse a Content-Range header from the request.
540

541
    Either returns None, when the header is not existent or should be ignored,
542
    or an (offset, length, total) tuple - check as length, total may be None.
543
    Returns (None, None, None) if the provided range is '*/*'.
544
    """
545

    
546
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
547
    if not ranges:
548
        return None
549

    
550
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
551
    m = p.match(ranges)
552
    if not m:
553
        if ranges == 'bytes */*':
554
            return (None, None, None)
555
        return None
556
    offset = int(m.group('offset'))
557
    upto = m.group('upto')
558
    total = m.group('total')
559
    if upto != '':
560
        upto = int(upto)
561
    else:
562
        upto = None
563
    if total != '*':
564
        total = int(total)
565
    else:
566
        total = None
567
    if (upto is not None and offset > upto) or \
568
        (total is not None and offset >= total) or \
569
            (total is not None and upto is not None and upto >= total):
570
        return None
571

    
572
    if upto is None:
573
        length = None
574
    else:
575
        length = upto - offset + 1
576
    return (offset, length, total)
577

    
578

    
579
def get_sharing(request):
580
    """Parse an X-Object-Sharing header from the request.
581

582
    Raises BadRequest on error.
583
    """
584

    
585
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
586
    if permissions is None:
587
        return None
588

    
589
    # TODO: Document or remove '~' replacing.
590
    permissions = permissions.replace('~', '')
591

    
592
    ret = {}
593
    permissions = permissions.replace(' ', '')
594
    if permissions == '':
595
        return ret
596
    for perm in (x for x in permissions.split(';')):
597
        if perm.startswith('read='):
598
            ret['read'] = list(set(
599
                [v.replace(' ', '').lower() for v in perm[5:].split(',')]))
600
            if '' in ret['read']:
601
                ret['read'].remove('')
602
            if '*' in ret['read']:
603
                ret['read'] = ['*']
604
            if len(ret['read']) == 0:
605
                raise BadRequest(
606
                    'Bad X-Object-Sharing header value: invalid length')
607
        elif perm.startswith('write='):
608
            ret['write'] = list(set(
609
                [v.replace(' ', '').lower() for v in perm[6:].split(',')]))
610
            if '' in ret['write']:
611
                ret['write'].remove('')
612
            if '*' in ret['write']:
613
                ret['write'] = ['*']
614
            if len(ret['write']) == 0:
615
                raise BadRequest(
616
                    'Bad X-Object-Sharing header value: invalid length')
617
        else:
618
            raise BadRequest(
619
                'Bad X-Object-Sharing header value: missing prefix')
620

    
621
    # replace displayname with uuid
622
    if TRANSLATE_UUIDS:
623
        try:
624
            ret['read'] = [replace_permissions_displayname(
625
                    getattr(request, 'token', None), x) \
626
                        for x in ret.get('read', [])]
627
            ret['write'] = [replace_permissions_displayname(
628
                    getattr(request, 'token', None), x) \
629
                        for x in ret.get('write', [])]
630
        except ItemNotExists, e:
631
            raise BadRequest(
632
                'Bad X-Object-Sharing header value: unknown account: %s' % e)
633

    
634
    # Keep duplicates only in write list.
635
    dups = [x for x in ret.get(
636
        'read', []) if x in ret.get('write', []) and x != '*']
637
    if dups:
638
        for x in dups:
639
            ret['read'].remove(x)
640
        if len(ret['read']) == 0:
641
            del(ret['read'])
642

    
643
    return ret
644

    
645

    
646
def get_public(request):
647
    """Parse an X-Object-Public header from the request.
648

649
    Raises BadRequest on error.
650
    """
651

    
652
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
653
    if public is None:
654
        return None
655

    
656
    public = public.replace(' ', '').lower()
657
    if public == 'true':
658
        return True
659
    elif public == 'false' or public == '':
660
        return False
661
    raise BadRequest('Bad X-Object-Public header value')
662

    
663

    
664
def raw_input_socket(request):
665
    """Return the socket for reading the rest of the request."""
666

    
667
    server_software = request.META.get('SERVER_SOFTWARE')
668
    if server_software and server_software.startswith('mod_python'):
669
        return request._req
670
    if 'wsgi.input' in request.environ:
671
        return request.environ['wsgi.input']
672
    raise NotImplemented('Unknown server software')
673

    
674
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024)  # 5GB
675

    
676

    
677
def socket_read_iterator(request, length=0, blocksize=4096):
678
    """Return a maximum of blocksize data read from the socket in each iteration.
679

680
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
681
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
682
    """
683

    
684
    sock = raw_input_socket(request)
685
    if length < 0:  # Chunked transfers
686
        # Small version (server does the dechunking).
687
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
688
            while length < MAX_UPLOAD_SIZE:
689
                data = sock.read(blocksize)
690
                if data == '':
691
                    return
692
                yield data
693
            raise BadRequest('Maximum size is reached')
694

    
695
        # Long version (do the dechunking).
696
        data = ''
697
        while length < MAX_UPLOAD_SIZE:
698
            # Get chunk size.
699
            if hasattr(sock, 'readline'):
700
                chunk_length = sock.readline()
701
            else:
702
                chunk_length = ''
703
                while chunk_length[-1:] != '\n':
704
                    chunk_length += sock.read(1)
705
                chunk_length.strip()
706
            pos = chunk_length.find(';')
707
            if pos >= 0:
708
                chunk_length = chunk_length[:pos]
709
            try:
710
                chunk_length = int(chunk_length, 16)
711
            except Exception, e:
712
                raise BadRequest('Bad chunk size')
713
                                 # TODO: Change to something more appropriate.
714
            # Check if done.
715
            if chunk_length == 0:
716
                if len(data) > 0:
717
                    yield data
718
                return
719
            # Get the actual data.
720
            while chunk_length > 0:
721
                chunk = sock.read(min(chunk_length, blocksize))
722
                chunk_length -= len(chunk)
723
                if length > 0:
724
                    length += len(chunk)
725
                data += chunk
726
                if len(data) >= blocksize:
727
                    ret = data[:blocksize]
728
                    data = data[blocksize:]
729
                    yield ret
730
            sock.read(2)  # CRLF
731
        raise BadRequest('Maximum size is reached')
732
    else:
733
        if length > MAX_UPLOAD_SIZE:
734
            raise BadRequest('Maximum size is reached')
735
        while length > 0:
736
            data = sock.read(min(length, blocksize))
737
            if not data:
738
                raise BadRequest()
739
            length -= len(data)
740
            yield data
741

    
742

    
743
class SaveToBackendHandler(FileUploadHandler):
744
    """Handle a file from an HTML form the django way."""
745

    
746
    def __init__(self, request=None):
747
        super(SaveToBackendHandler, self).__init__(request)
748
        self.backend = request.backend
749

    
750
    def put_data(self, length):
751
        if len(self.data) >= length:
752
            block = self.data[:length]
753
            self.file.hashmap.append(self.backend.put_block(block))
754
            self.md5.update(block)
755
            self.data = self.data[length:]
756

    
757
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
758
        self.md5 = hashlib.md5()
759
        self.data = ''
760
        self.file = UploadedFile(
761
            name=file_name, content_type=content_type, charset=charset)
762
        self.file.size = 0
763
        self.file.hashmap = []
764

    
765
    def receive_data_chunk(self, raw_data, start):
766
        self.data += raw_data
767
        self.file.size += len(raw_data)
768
        self.put_data(self.request.backend.block_size)
769
        return None
770

    
771
    def file_complete(self, file_size):
772
        l = len(self.data)
773
        if l > 0:
774
            self.put_data(l)
775
        self.file.etag = self.md5.hexdigest().lower()
776
        return self.file
777

    
778

    
779
class ObjectWrapper(object):
780
    """Return the object's data block-per-block in each iteration.
781

782
    Read from the object using the offset and length provided in each entry of the range list.
783
    """
784

    
785
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
786
        self.backend = backend
787
        self.ranges = ranges
788
        self.sizes = sizes
789
        self.hashmaps = hashmaps
790
        self.boundary = boundary
791
        self.size = sum(self.sizes)
792

    
793
        self.file_index = 0
794
        self.block_index = 0
795
        self.block_hash = -1
796
        self.block = ''
797

    
798
        self.range_index = -1
799
        self.offset, self.length = self.ranges[0]
800

    
801
    def __iter__(self):
802
        return self
803

    
804
    def part_iterator(self):
805
        if self.length > 0:
806
            # Get the file for the current offset.
807
            file_size = self.sizes[self.file_index]
808
            while self.offset >= file_size:
809
                self.offset -= file_size
810
                self.file_index += 1
811
                file_size = self.sizes[self.file_index]
812

    
813
            # Get the block for the current position.
814
            self.block_index = int(self.offset / self.backend.block_size)
815
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
816
                self.block_hash = self.hashmaps[
817
                    self.file_index][self.block_index]
818
                try:
819
                    self.block = self.backend.get_block(self.block_hash)
820
                except ItemNotExists:
821
                    raise ItemNotFound('Block does not exist')
822

    
823
            # Get the data from the block.
824
            bo = self.offset % self.backend.block_size
825
            bs = self.backend.block_size
826
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
827
                    self.sizes[self.file_index] % self.backend.block_size):
828
                bs = self.sizes[self.file_index] % self.backend.block_size
829
            bl = min(self.length, bs - bo)
830
            data = self.block[bo:bo + bl]
831
            self.offset += bl
832
            self.length -= bl
833
            return data
834
        else:
835
            raise StopIteration
836

    
837
    def next(self):
838
        if len(self.ranges) == 1:
839
            return self.part_iterator()
840
        if self.range_index == len(self.ranges):
841
            raise StopIteration
842
        try:
843
            if self.range_index == -1:
844
                raise StopIteration
845
            return self.part_iterator()
846
        except StopIteration:
847
            self.range_index += 1
848
            out = []
849
            if self.range_index < len(self.ranges):
850
                # Part header.
851
                self.offset, self.length = self.ranges[self.range_index]
852
                self.file_index = 0
853
                if self.range_index > 0:
854
                    out.append('')
855
                out.append('--' + self.boundary)
856
                out.append('Content-Range: bytes %d-%d/%d' % (
857
                    self.offset, self.offset + self.length - 1, self.size))
858
                out.append('Content-Transfer-Encoding: binary')
859
                out.append('')
860
                out.append('')
861
                return '\r\n'.join(out)
862
            else:
863
                # Footer.
864
                out.append('')
865
                out.append('--' + self.boundary + '--')
866
                out.append('')
867
                return '\r\n'.join(out)
868

    
869

    
870
def object_data_response(request, sizes, hashmaps, meta, public=False):
871
    """Get the HttpResponse object for replying with the object's data."""
872

    
873
    # Range handling.
874
    size = sum(sizes)
875
    ranges = get_range(request, size)
876
    if ranges is None:
877
        ranges = [(0, size)]
878
        ret = 200
879
    else:
880
        check = [True for offset, length in ranges if
881
                 length <= 0 or length > size or
882
                 offset < 0 or offset >= size or
883
                 offset + length > size]
884
        if len(check) > 0:
885
            raise RangeNotSatisfiable('Requested range exceeds object limits')
886
        ret = 206
887
        if_range = request.META.get('HTTP_IF_RANGE')
888
        if if_range:
889
            try:
890
                # Modification time has passed instead.
891
                last_modified = parse_http_date(if_range)
892
                if last_modified != meta['modified']:
893
                    ranges = [(0, size)]
894
                    ret = 200
895
            except ValueError:
896
                if if_range != meta['checksum']:
897
                    ranges = [(0, size)]
898
                    ret = 200
899

    
900
    if ret == 206 and len(ranges) > 1:
901
        boundary = uuid.uuid4().hex
902
    else:
903
        boundary = ''
904
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
905
    response = HttpResponse(wrapper, status=ret)
906
    put_object_headers(
907
            response, meta, restricted=public, token=getattr(request, 'token', None))
908
    if ret == 206:
909
        if len(ranges) == 1:
910
            offset, length = ranges[0]
911
            response[
912
                'Content-Length'] = length  # Update with the correct length.
913
            response['Content-Range'] = 'bytes %d-%d/%d' % (
914
                offset, offset + length - 1, size)
915
        else:
916
            del(response['Content-Length'])
917
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (
918
                boundary,)
919
    return response
920

    
921

    
922
def put_object_block(request, hashmap, data, offset):
923
    """Put one block of data at the given offset."""
924

    
925
    bi = int(offset / request.backend.block_size)
926
    bo = offset % request.backend.block_size
927
    bl = min(len(data), request.backend.block_size - bo)
928
    if bi < len(hashmap):
929
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
930
    else:
931
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
932
    return bl  # Return ammount of data written.
933

    
934

    
935
def hashmap_md5(backend, hashmap, size):
936
    """Produce the MD5 sum from the data in the hashmap."""
937

    
938
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
939
    md5 = hashlib.md5()
940
    bs = backend.block_size
941
    for bi, hash in enumerate(hashmap):
942
        data = backend.get_block(hash)  # Blocks come in padded.
943
        if bi == len(hashmap) - 1:
944
            data = data[:size % bs]
945
        md5.update(data)
946
    return md5.hexdigest().lower()
947

    
948

    
949
def simple_list_response(request, l):
950
    if request.serialization == 'text':
951
        return '\n'.join(l) + '\n'
952
    if request.serialization == 'xml':
953
        return render_to_string('items.xml', {'items': l})
954
    if request.serialization == 'json':
955
        return json.dumps(l)
956

    
957

    
958
from pithos.backends.util import PithosBackendPool
959
POOL_SIZE = 5
960
if RADOS_STORAGE:
961
    BLOCK_PARAMS = { 'mappool': RADOS_POOL_MAPS,
962
                     'blockpool': RADOS_POOL_BLOCKS,
963
                   }
964
else:
965
    BLOCK_PARAMS = { 'mappool': None,
966
                     'blockpool': None,
967
                   }
968

    
969

    
970
_pithos_backend_pool = PithosBackendPool(size=POOL_SIZE,
971
                                         db_module=BACKEND_DB_MODULE,
972
                                         db_connection=BACKEND_DB_CONNECTION,
973
                                         block_module=BACKEND_BLOCK_MODULE,
974
                                         block_path=BACKEND_BLOCK_PATH,
975
                                         block_umask=BACKEND_BLOCK_UMASK,
976
                                         queue_module=BACKEND_QUEUE_MODULE,
977
                                         queue_hosts=BACKEND_QUEUE_HOSTS,
978
                                         queue_exchange=BACKEND_QUEUE_EXCHANGE,
979
                                         quotaholder_url=QUOTAHOLDER_URL,
980
                                         quotaholder_token=QUOTAHOLDER_TOKEN,
981
                                         free_versioning=BACKEND_FREE_VERSIONING,
982
                                         block_params=BLOCK_PARAMS)
983

    
984
def get_backend():
985
    backend = _pithos_backend_pool.pool_get()
986
    backend.default_policy['quota'] = BACKEND_QUOTA
987
    backend.default_policy['versioning'] = BACKEND_VERSIONING
988
    backend.messages = []
989
    return backend
990

    
991

    
992
def update_request_headers(request):
993
    # Handle URL-encoded keys and values.
994
    meta = dict([(
995
        k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
996
    for k, v in meta.iteritems():
997
        try:
998
            k.decode('ascii')
999
            v.decode('ascii')
1000
        except UnicodeDecodeError:
1001
            raise BadRequest('Bad character in headers.')
1002
        if '%' in k or '%' in v:
1003
            del(request.META[k])
1004
            request.META[unquote(k)] = smart_unicode(unquote(
1005
                v), strings_only=True)
1006

    
1007

    
1008
def update_response_headers(request, response):
1009
    if request.serialization == 'xml':
1010
        response['Content-Type'] = 'application/xml; charset=UTF-8'
1011
    elif request.serialization == 'json':
1012
        response['Content-Type'] = 'application/json; charset=UTF-8'
1013
    elif not response['Content-Type']:
1014
        response['Content-Type'] = 'text/plain; charset=UTF-8'
1015

    
1016
    if (not response.has_header('Content-Length') and
1017
        not (response.has_header('Content-Type') and
1018
             response['Content-Type'].startswith('multipart/byteranges'))):
1019
        response['Content-Length'] = len(response.content)
1020

    
1021
    # URL-encode unicode in headers.
1022
    meta = response.items()
1023
    for k, v in meta:
1024
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
1025
                k.startswith('X-Object-') or k.startswith('Content-')):
1026
            del(response[k])
1027
            response[quote(k)] = quote(v, safe='/=,:@; ')
1028

    
1029

    
1030
def render_fault(request, fault):
1031
    if isinstance(fault, InternalServerError) and settings.DEBUG:
1032
        fault.details = format_exc(fault)
1033

    
1034
    request.serialization = 'text'
1035
    data = fault.message + '\n'
1036
    if fault.details:
1037
        data += '\n' + fault.details
1038
    response = HttpResponse(data, status=fault.code)
1039
    update_response_headers(request, response)
1040
    return response
1041

    
1042

    
1043
def request_serialization(request, format_allowed=False):
1044
    """Return the serialization format requested.
1045

1046
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
1047
    """
1048

    
1049
    if not format_allowed:
1050
        return 'text'
1051

    
1052
    format = request.GET.get('format')
1053
    if format == 'json':
1054
        return 'json'
1055
    elif format == 'xml':
1056
        return 'xml'
1057

    
1058
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
1059
        accept, sep, rest = item.strip().partition(';')
1060
        if accept == 'application/json':
1061
            return 'json'
1062
        elif accept == 'application/xml' or accept == 'text/xml':
1063
            return 'xml'
1064

    
1065
    return 'text'
1066

    
1067
def get_pithos_usage(usage):
1068
    for u in usage:
1069
        if u.get('name') == 'pithos+.diskspace':
1070
            return u
1071

    
1072
def api_method(http_method=None, format_allowed=False, user_required=True,
1073
        request_usage=False):
1074
    """Decorator function for views that implement an API method."""
1075

    
1076
    def decorator(func):
1077
        @wraps(func)
1078
        def wrapper(request, *args, **kwargs):
1079
            try:
1080
                if http_method and request.method != http_method:
1081
                    raise BadRequest('Method not allowed.')
1082

    
1083
                if user_required:
1084
                    token = None
1085
                    if request.method in ('HEAD', 'GET') and COOKIE_NAME in request.COOKIES:
1086
                        cookie_value = unquote(
1087
                            request.COOKIES.get(COOKIE_NAME, ''))
1088
                        account, sep, token = cookie_value.partition('|')
1089
                    get_user(request,
1090
                             AUTHENTICATION_URL,
1091
                             AUTHENTICATION_USERS,
1092
                             token,
1093
                             request_usage)
1094
                    if  getattr(request, 'user', None) is None:
1095
                        raise Unauthorized('Access denied')
1096
                    assert getattr(request, 'user_uniq', None) != None
1097
                    request.user_usage = get_pithos_usage(request.user.get('usage', []))
1098
                    request.token = request.GET.get('X-Auth-Token', request.META.get('HTTP_X_AUTH_TOKEN', token))
1099

    
1100
                # The args variable may contain up to (account, container, object).
1101
                if len(args) > 1 and len(args[1]) > 256:
1102
                    raise BadRequest('Container name too large.')
1103
                if len(args) > 2 and len(args[2]) > 1024:
1104
                    raise BadRequest('Object name too large.')
1105

    
1106
                # Format and check headers.
1107
                update_request_headers(request)
1108

    
1109
                # Fill in custom request variables.
1110
                request.serialization = request_serialization(
1111
                    request, format_allowed)
1112
                request.backend = get_backend()
1113

    
1114
                response = func(request, *args, **kwargs)
1115
                update_response_headers(request, response)
1116
                return response
1117
            except Fault, fault:
1118
                if fault.code >= 500:
1119
                    logger.exception("API Fault")
1120
                return render_fault(request, fault)
1121
            except BaseException, e:
1122
                logger.exception('Unexpected error: %s' % e)
1123
                fault = InternalServerError('Unexpected error: %s' % e)
1124
                return render_fault(request, fault)
1125
            finally:
1126
                if getattr(request, 'backend', None) is not None:
1127
                    request.backend.close()
1128
        return wrapper
1129
    return decorator