Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 7273ee62

History | View | Annotate | Download (40.9 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
#
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
#
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
#
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from synnefo.lib.parsedate import parse_http_date_safe, parse_http_date
52
from synnefo.lib.astakos import get_user
53

    
54
from pithos.api.faults import (
55
    Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
56
    Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
57
    RangeNotSatisfiable, InternalServerError, NotImplemented)
58
from pithos.api.short_url import encode_url
59
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
60
                                 BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
61
                                 BACKEND_BLOCK_UMASK,
62
                                 BACKEND_QUEUE_MODULE, BACKEND_QUEUE_HOSTS,
63
                                 BACKEND_QUEUE_EXCHANGE, USE_QUOTAHOLDER,
64
                                 QUOTAHOLDER_URL, QUOTAHOLDER_TOKEN,
65
                                 QUOTAHOLDER_POOLSIZE,
66
                                 BACKEND_QUOTA, BACKEND_VERSIONING,
67
                                 BACKEND_FREE_VERSIONING,
68
                                 AUTHENTICATION_URL, AUTHENTICATION_USERS,
69
                                 COOKIE_NAME, USER_CATALOG_URL,
70
                                 RADOS_STORAGE, RADOS_POOL_BLOCKS,
71
                                 RADOS_POOL_MAPS, TRANSLATE_UUIDS)
72
from pithos.backends import connect_backend
73
from pithos.backends.base import (NotAllowedError, QuotaError, ItemNotExists,
74
                                  VersionNotExists)
75
from synnefo.lib.astakos import (get_user_uuid, get_displayname,
76
                                 get_uuids, get_displaynames)
77

    
78
import logging
79
import re
80
import hashlib
81
import uuid
82
import decimal
83

    
84
logger = logging.getLogger(__name__)
85

    
86

    
87
class UTC(tzinfo):
88
    def utcoffset(self, dt):
89
        return timedelta(0)
90

    
91
    def tzname(self, dt):
92
        return 'UTC'
93

    
94
    def dst(self, dt):
95
        return timedelta(0)
96

    
97

    
98
def json_encode_decimal(obj):
99
    if isinstance(obj, decimal.Decimal):
100
        return str(obj)
101
    raise TypeError(repr(obj) + " is not JSON serializable")
102

    
103

    
104
def isoformat(d):
105
    """Return an ISO8601 date string that includes a timezone."""
106

    
107
    return d.replace(tzinfo=UTC()).isoformat()
108

    
109

    
110
def rename_meta_key(d, old, new):
111
    if old not in d:
112
        return
113
    d[new] = d[old]
114
    del(d[old])
115

    
116

    
117
def printable_header_dict(d):
118
    """Format a meta dictionary for printing out json/xml.
119

120
    Convert all keys to lower case and replace dashes with underscores.
121
    Format 'last_modified' timestamp.
122
    """
123

    
124
    if 'last_modified' in d and d['last_modified']:
125
        d['last_modified'] = isoformat(
126
            datetime.fromtimestamp(d['last_modified']))
127
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
128

    
129

    
130
def format_header_key(k):
131
    """Convert underscores to dashes and capitalize intra-dash strings."""
132
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
133

    
134

    
135
def get_header_prefix(request, prefix):
136
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
137

    
138
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
139
    # TODO: Document or remove '~' replacing.
140
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
141

    
142

    
143
def check_meta_headers(meta):
144
    if len(meta) > 90:
145
        raise BadRequest('Too many headers.')
146
    for k, v in meta.iteritems():
147
        if len(k) > 128:
148
            raise BadRequest('Header name too large.')
149
        if len(v) > 256:
150
            raise BadRequest('Header value too large.')
151

    
152

    
153
def get_account_headers(request):
154
    meta = get_header_prefix(request, 'X-Account-Meta-')
155
    check_meta_headers(meta)
156
    groups = {}
157
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
158
        n = k[16:].lower()
159
        if '-' in n or '_' in n:
160
            raise BadRequest('Bad characters in group name')
161
        groups[n] = v.replace(' ', '').split(',')
162
        while '' in groups[n]:
163
            groups[n].remove('')
164
    return meta, groups
165

    
166

    
167
def put_account_headers(response, meta, groups, policy):
168
    if 'count' in meta:
169
        response['X-Account-Container-Count'] = meta['count']
170
    if 'bytes' in meta:
171
        response['X-Account-Bytes-Used'] = meta['bytes']
172
    response['Last-Modified'] = http_date(int(meta['modified']))
173
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
174
        response[smart_str(
175
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
176
    if 'until_timestamp' in meta:
177
        response['X-Account-Until-Timestamp'] = http_date(
178
            int(meta['until_timestamp']))
179
    for k, v in groups.iteritems():
180
        k = smart_str(k, strings_only=True)
181
        k = format_header_key('X-Account-Group-' + k)
182
        v = smart_str(','.join(v), strings_only=True)
183
        response[k] = v
184
    for k, v in policy.iteritems():
185
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
186

    
187

    
188
def get_container_headers(request):
189
    meta = get_header_prefix(request, 'X-Container-Meta-')
190
    check_meta_headers(meta)
191
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
192
    return meta, policy
193

    
194

    
195
def put_container_headers(request, response, meta, policy):
196
    if 'count' in meta:
197
        response['X-Container-Object-Count'] = meta['count']
198
    if 'bytes' in meta:
199
        response['X-Container-Bytes-Used'] = meta['bytes']
200
    response['Last-Modified'] = http_date(int(meta['modified']))
201
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
202
        response[smart_str(
203
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
204
    l = [smart_str(x, strings_only=True) for x in meta['object_meta']
205
         if x.startswith('X-Object-Meta-')]
206
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
207
    response['X-Container-Block-Size'] = request.backend.block_size
208
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
209
    if 'until_timestamp' in meta:
210
        response['X-Container-Until-Timestamp'] = http_date(
211
            int(meta['until_timestamp']))
212
    for k, v in policy.iteritems():
213
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
214

    
215

    
216
def get_object_headers(request):
217
    content_type = request.META.get('CONTENT_TYPE', None)
218
    meta = get_header_prefix(request, 'X-Object-Meta-')
219
    check_meta_headers(meta)
220
    if request.META.get('HTTP_CONTENT_ENCODING'):
221
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
222
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
223
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
224
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
225
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
226
    return content_type, meta, get_sharing(request), get_public(request)
227

    
228

    
229
def put_object_headers(response, meta, restricted=False, token=None):
230
    response['ETag'] = meta['checksum']
231
    response['Content-Length'] = meta['bytes']
232
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
233
    response['Last-Modified'] = http_date(int(meta['modified']))
234
    if not restricted:
235
        response['X-Object-Hash'] = meta['hash']
236
        response['X-Object-UUID'] = meta['uuid']
237
        if TRANSLATE_UUIDS:
238
            meta['modified_by'] = retrieve_displayname(token, meta['modified_by'])
239
        response['X-Object-Modified-By'] = smart_str(
240
            meta['modified_by'], strings_only=True)
241
        response['X-Object-Version'] = meta['version']
242
        response['X-Object-Version-Timestamp'] = http_date(
243
            int(meta['version_timestamp']))
244
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
245
            response[smart_str(
246
                k, strings_only=True)] = smart_str(meta[k], strings_only=True)
247
        for k in (
248
            'Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
249
            'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
250
                'X-Object-Public'):
251
            if k in meta:
252
                response[k] = smart_str(meta[k], strings_only=True)
253
    else:
254
        for k in ('Content-Encoding', 'Content-Disposition'):
255
            if k in meta:
256
                response[k] = smart_str(meta[k], strings_only=True)
257

    
258

    
259
def update_manifest_meta(request, v_account, meta):
260
    """Update metadata if the object has an X-Object-Manifest."""
261

    
262
    if 'X-Object-Manifest' in meta:
263
        etag = ''
264
        bytes = 0
265
        try:
266
            src_container, src_name = split_container_object_string(
267
                '/' + meta['X-Object-Manifest'])
268
            objects = request.backend.list_objects(
269
                request.user_uniq, v_account,
270
                src_container, prefix=src_name, virtual=False)
271
            for x in objects:
272
                src_meta = request.backend.get_object_meta(request.user_uniq,
273
                                                           v_account, src_container, x[0], 'pithos', x[1])
274
                etag += src_meta['checksum']
275
                bytes += src_meta['bytes']
276
        except:
277
            # Ignore errors.
278
            return
279
        meta['bytes'] = bytes
280
        md5 = hashlib.md5()
281
        md5.update(etag)
282
        meta['checksum'] = md5.hexdigest().lower()
283

    
284
def is_uuid(str):
285
    if str is None:
286
        return False
287
    try:
288
        uuid.UUID(str)
289
    except ValueError:
290
        return False
291
    else:
292
       return True
293

    
294
##########################
295
# USER CATALOG utilities #
296
##########################
297

    
298
def retrieve_displayname(token, uuid, fail_silently=True):
299
    displayname = get_displayname(
300
            token, uuid, USER_CATALOG_URL, AUTHENTICATION_USERS)
301
    if not displayname and not fail_silently:
302
        raise ItemNotExists(uuid)
303
    elif not displayname:
304
        # just return the uuid
305
        return uuid
306
    return displayname
307

    
308
def retrieve_displaynames(token, uuids, return_dict=False, fail_silently=True):
309
    catalog =  get_displaynames(
310
            token, uuids, USER_CATALOG_URL, AUTHENTICATION_USERS) or {}
311
    missing = list(set(uuids) - set(catalog))
312
    if missing and not fail_silently:
313
        raise ItemNotExists('Unknown displaynames: %s' % ', '.join(missing))
314
    return catalog if return_dict else [catalog.get(i) for i in uuids]
315

    
316
def retrieve_uuid(token, displayname):
317
    if is_uuid(displayname):
318
        return displayname
319

    
320
    uuid = get_user_uuid(
321
        token, displayname, USER_CATALOG_URL, AUTHENTICATION_USERS)
322
    if not uuid:
323
        raise ItemNotExists(displayname)
324
    return uuid
325

    
326
def retrieve_uuids(token, displaynames, return_dict=False, fail_silently=True):
327
    catalog = get_uuids(
328
            token, displaynames, USER_CATALOG_URL, AUTHENTICATION_USERS) or {}
329
    missing = list(set(displaynames) - set(catalog))
330
    if missing and not fail_silently:
331
        raise ItemNotExists('Unknown uuids: %s' % ', '.join(missing))
332
    return catalog if return_dict else [catalog.get(i) for i in displaynames]
333

    
334
def replace_permissions_displayname(token, holder):
335
    if holder == '*':
336
        return holder
337
    try:
338
        # check first for a group permission
339
        account, group = holder.split(':', 1)
340
    except ValueError:
341
        return retrieve_uuid(token, holder)
342
    else:
343
        return ':'.join([retrieve_uuid(token, account), group])
344

    
345
def replace_permissions_uuid(token, holder):
346
    if holder == '*':
347
        return holder
348
    try:
349
        # check first for a group permission
350
        account, group = holder.split(':', 1)
351
    except ValueError:
352
        return retrieve_displayname(token, holder)
353
    else:
354
        return ':'.join([retrieve_displayname(token, account), group])
355

    
356
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
357
    if permissions is None:
358
        return
359
    allowed, perm_path, perms = permissions
360
    if len(perms) == 0:
361
        return
362

    
363
    # replace uuid with displayname
364
    if TRANSLATE_UUIDS:
365
        perms['read'] = [replace_permissions_uuid(
366
                getattr(request, 'token', None), x) \
367
                    for x in perms.get('read', [])]
368
        perms['write'] = [replace_permissions_uuid(
369
                getattr(request, 'token', None), x) \
370
                    for x in perms.get('write', [])]
371

    
372
    ret = []
373

    
374
    r = ','.join(perms.get('read', []))
375
    if r:
376
        ret.append('read=' + r)
377
    w = ','.join(perms.get('write', []))
378
    if w:
379
        ret.append('write=' + w)
380
    meta['X-Object-Sharing'] = '; '.join(ret)
381
    if '/'.join((v_account, v_container, v_object)) != perm_path:
382
        meta['X-Object-Shared-By'] = perm_path
383
    if request.user_uniq != v_account:
384
        meta['X-Object-Allowed-To'] = allowed
385

    
386

    
387
def update_public_meta(public, meta):
388
    if not public:
389
        return
390
    meta['X-Object-Public'] = '/public/' + encode_url(public)
391

    
392

    
393
def validate_modification_preconditions(request, meta):
394
    """Check that the modified timestamp conforms with the preconditions set."""
395

    
396
    if 'modified' not in meta:
397
        return  # TODO: Always return?
398

    
399
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
400
    if if_modified_since is not None:
401
        if_modified_since = parse_http_date_safe(if_modified_since)
402
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
403
        raise NotModified('Resource has not been modified')
404

    
405
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
406
    if if_unmodified_since is not None:
407
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
408
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
409
        raise PreconditionFailed('Resource has been modified')
410

    
411

    
412
def validate_matching_preconditions(request, meta):
413
    """Check that the ETag conforms with the preconditions set."""
414

    
415
    etag = meta['checksum']
416
    if not etag:
417
        etag = None
418

    
419
    if_match = request.META.get('HTTP_IF_MATCH')
420
    if if_match is not None:
421
        if etag is None:
422
            raise PreconditionFailed('Resource does not exist')
423
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
424
            raise PreconditionFailed('Resource ETag does not match')
425

    
426
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
427
    if if_none_match is not None:
428
        # TODO: If this passes, must ignore If-Modified-Since header.
429
        if etag is not None:
430
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
431
                # TODO: Continue if an If-Modified-Since header is present.
432
                if request.method in ('HEAD', 'GET'):
433
                    raise NotModified('Resource ETag matches')
434
                raise PreconditionFailed('Resource exists or ETag matches')
435

    
436

    
437
def split_container_object_string(s):
438
    if not len(s) > 0 or s[0] != '/':
439
        raise ValueError
440
    s = s[1:]
441
    pos = s.find('/')
442
    if pos == -1 or pos == len(s) - 1:
443
        raise ValueError
444
    return s[:pos], s[(pos + 1):]
445

    
446

    
447
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False, delimiter=None):
448
    """Copy or move an object."""
449

    
450
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
451
        del(request.META['CONTENT_TYPE'])
452
    content_type, meta, permissions, public = get_object_headers(request)
453
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
454
    try:
455
        if move:
456
            version_id = request.backend.move_object(
457
                request.user_uniq, src_account, src_container, src_name,
458
                dest_account, dest_container, dest_name,
459
                content_type, 'pithos', meta, False, permissions, delimiter)
460
        else:
461
            version_id = request.backend.copy_object(
462
                request.user_uniq, src_account, src_container, src_name,
463
                dest_account, dest_container, dest_name,
464
                content_type, 'pithos', meta, False, permissions, src_version, delimiter)
465
    except NotAllowedError:
466
        raise Forbidden('Not allowed')
467
    except (ItemNotExists, VersionNotExists):
468
        raise ItemNotFound('Container or object does not exist')
469
    except ValueError:
470
        raise BadRequest('Invalid sharing header')
471
    except QuotaError, e:
472
        raise RequestEntityTooLarge('Quota error: %s' % e)
473
    if public is not None:
474
        try:
475
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
476
        except NotAllowedError:
477
            raise Forbidden('Not allowed')
478
        except ItemNotExists:
479
            raise ItemNotFound('Object does not exist')
480
    return version_id
481

    
482

    
483
def get_int_parameter(p):
484
    if p is not None:
485
        try:
486
            p = int(p)
487
        except ValueError:
488
            return None
489
        if p < 0:
490
            return None
491
    return p
492

    
493

    
494
def get_content_length(request):
495
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
496
    if content_length is None:
497
        raise LengthRequired('Missing or invalid Content-Length header')
498
    return content_length
499

    
500

    
501
def get_range(request, size):
502
    """Parse a Range header from the request.
503

504
    Either returns None, when the header is not existent or should be ignored,
505
    or a list of (offset, length) tuples - should be further checked.
506
    """
507

    
508
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
509
    if not ranges.startswith('bytes='):
510
        return None
511

    
512
    ret = []
513
    for r in (x.strip() for x in ranges[6:].split(',')):
514
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
515
        m = p.match(r)
516
        if not m:
517
            return None
518
        offset = m.group('offset')
519
        upto = m.group('upto')
520
        if offset == '' and upto == '':
521
            return None
522

    
523
        if offset != '':
524
            offset = int(offset)
525
            if upto != '':
526
                upto = int(upto)
527
                if offset > upto:
528
                    return None
529
                ret.append((offset, upto - offset + 1))
530
            else:
531
                ret.append((offset, size - offset))
532
        else:
533
            length = int(upto)
534
            ret.append((size - length, length))
535

    
536
    return ret
537

    
538

    
539
def get_content_range(request):
540
    """Parse a Content-Range header from the request.
541

542
    Either returns None, when the header is not existent or should be ignored,
543
    or an (offset, length, total) tuple - check as length, total may be None.
544
    Returns (None, None, None) if the provided range is '*/*'.
545
    """
546

    
547
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
548
    if not ranges:
549
        return None
550

    
551
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
552
    m = p.match(ranges)
553
    if not m:
554
        if ranges == 'bytes */*':
555
            return (None, None, None)
556
        return None
557
    offset = int(m.group('offset'))
558
    upto = m.group('upto')
559
    total = m.group('total')
560
    if upto != '':
561
        upto = int(upto)
562
    else:
563
        upto = None
564
    if total != '*':
565
        total = int(total)
566
    else:
567
        total = None
568
    if (upto is not None and offset > upto) or \
569
        (total is not None and offset >= total) or \
570
            (total is not None and upto is not None and upto >= total):
571
        return None
572

    
573
    if upto is None:
574
        length = None
575
    else:
576
        length = upto - offset + 1
577
    return (offset, length, total)
578

    
579

    
580
def get_sharing(request):
581
    """Parse an X-Object-Sharing header from the request.
582

583
    Raises BadRequest on error.
584
    """
585

    
586
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
587
    if permissions is None:
588
        return None
589

    
590
    # TODO: Document or remove '~' replacing.
591
    permissions = permissions.replace('~', '')
592

    
593
    ret = {}
594
    permissions = permissions.replace(' ', '')
595
    if permissions == '':
596
        return ret
597
    for perm in (x for x in permissions.split(';')):
598
        if perm.startswith('read='):
599
            ret['read'] = list(set(
600
                [v.replace(' ', '').lower() for v in perm[5:].split(',')]))
601
            if '' in ret['read']:
602
                ret['read'].remove('')
603
            if '*' in ret['read']:
604
                ret['read'] = ['*']
605
            if len(ret['read']) == 0:
606
                raise BadRequest(
607
                    'Bad X-Object-Sharing header value: invalid length')
608
        elif perm.startswith('write='):
609
            ret['write'] = list(set(
610
                [v.replace(' ', '').lower() for v in perm[6:].split(',')]))
611
            if '' in ret['write']:
612
                ret['write'].remove('')
613
            if '*' in ret['write']:
614
                ret['write'] = ['*']
615
            if len(ret['write']) == 0:
616
                raise BadRequest(
617
                    'Bad X-Object-Sharing header value: invalid length')
618
        else:
619
            raise BadRequest(
620
                'Bad X-Object-Sharing header value: missing prefix')
621

    
622
    # replace displayname with uuid
623
    if TRANSLATE_UUIDS:
624
        try:
625
            ret['read'] = [replace_permissions_displayname(
626
                    getattr(request, 'token', None), x) \
627
                        for x in ret.get('read', [])]
628
            ret['write'] = [replace_permissions_displayname(
629
                    getattr(request, 'token', None), x) \
630
                        for x in ret.get('write', [])]
631
        except ItemNotExists, e:
632
            raise BadRequest(
633
                'Bad X-Object-Sharing header value: unknown account: %s' % e)
634

    
635
    # Keep duplicates only in write list.
636
    dups = [x for x in ret.get(
637
        'read', []) if x in ret.get('write', []) and x != '*']
638
    if dups:
639
        for x in dups:
640
            ret['read'].remove(x)
641
        if len(ret['read']) == 0:
642
            del(ret['read'])
643

    
644
    return ret
645

    
646

    
647
def get_public(request):
648
    """Parse an X-Object-Public header from the request.
649

650
    Raises BadRequest on error.
651
    """
652

    
653
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
654
    if public is None:
655
        return None
656

    
657
    public = public.replace(' ', '').lower()
658
    if public == 'true':
659
        return True
660
    elif public == 'false' or public == '':
661
        return False
662
    raise BadRequest('Bad X-Object-Public header value')
663

    
664

    
665
def raw_input_socket(request):
666
    """Return the socket for reading the rest of the request."""
667

    
668
    server_software = request.META.get('SERVER_SOFTWARE')
669
    if server_software and server_software.startswith('mod_python'):
670
        return request._req
671
    if 'wsgi.input' in request.environ:
672
        return request.environ['wsgi.input']
673
    raise NotImplemented('Unknown server software')
674

    
675
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024)  # 5GB
676

    
677

    
678
def socket_read_iterator(request, length=0, blocksize=4096):
679
    """Return a maximum of blocksize data read from the socket in each iteration.
680

681
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
682
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
683
    """
684

    
685
    sock = raw_input_socket(request)
686
    if length < 0:  # Chunked transfers
687
        # Small version (server does the dechunking).
688
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
689
            while length < MAX_UPLOAD_SIZE:
690
                data = sock.read(blocksize)
691
                if data == '':
692
                    return
693
                yield data
694
            raise BadRequest('Maximum size is reached')
695

    
696
        # Long version (do the dechunking).
697
        data = ''
698
        while length < MAX_UPLOAD_SIZE:
699
            # Get chunk size.
700
            if hasattr(sock, 'readline'):
701
                chunk_length = sock.readline()
702
            else:
703
                chunk_length = ''
704
                while chunk_length[-1:] != '\n':
705
                    chunk_length += sock.read(1)
706
                chunk_length.strip()
707
            pos = chunk_length.find(';')
708
            if pos >= 0:
709
                chunk_length = chunk_length[:pos]
710
            try:
711
                chunk_length = int(chunk_length, 16)
712
            except Exception, e:
713
                raise BadRequest('Bad chunk size')
714
                                 # TODO: Change to something more appropriate.
715
            # Check if done.
716
            if chunk_length == 0:
717
                if len(data) > 0:
718
                    yield data
719
                return
720
            # Get the actual data.
721
            while chunk_length > 0:
722
                chunk = sock.read(min(chunk_length, blocksize))
723
                chunk_length -= len(chunk)
724
                if length > 0:
725
                    length += len(chunk)
726
                data += chunk
727
                if len(data) >= blocksize:
728
                    ret = data[:blocksize]
729
                    data = data[blocksize:]
730
                    yield ret
731
            sock.read(2)  # CRLF
732
        raise BadRequest('Maximum size is reached')
733
    else:
734
        if length > MAX_UPLOAD_SIZE:
735
            raise BadRequest('Maximum size is reached')
736
        while length > 0:
737
            data = sock.read(min(length, blocksize))
738
            if not data:
739
                raise BadRequest()
740
            length -= len(data)
741
            yield data
742

    
743

    
744
class SaveToBackendHandler(FileUploadHandler):
745
    """Handle a file from an HTML form the django way."""
746

    
747
    def __init__(self, request=None):
748
        super(SaveToBackendHandler, self).__init__(request)
749
        self.backend = request.backend
750

    
751
    def put_data(self, length):
752
        if len(self.data) >= length:
753
            block = self.data[:length]
754
            self.file.hashmap.append(self.backend.put_block(block))
755
            self.md5.update(block)
756
            self.data = self.data[length:]
757

    
758
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
759
        self.md5 = hashlib.md5()
760
        self.data = ''
761
        self.file = UploadedFile(
762
            name=file_name, content_type=content_type, charset=charset)
763
        self.file.size = 0
764
        self.file.hashmap = []
765

    
766
    def receive_data_chunk(self, raw_data, start):
767
        self.data += raw_data
768
        self.file.size += len(raw_data)
769
        self.put_data(self.request.backend.block_size)
770
        return None
771

    
772
    def file_complete(self, file_size):
773
        l = len(self.data)
774
        if l > 0:
775
            self.put_data(l)
776
        self.file.etag = self.md5.hexdigest().lower()
777
        return self.file
778

    
779

    
780
class ObjectWrapper(object):
781
    """Return the object's data block-per-block in each iteration.
782

783
    Read from the object using the offset and length provided in each entry of the range list.
784
    """
785

    
786
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
787
        self.backend = backend
788
        self.ranges = ranges
789
        self.sizes = sizes
790
        self.hashmaps = hashmaps
791
        self.boundary = boundary
792
        self.size = sum(self.sizes)
793

    
794
        self.file_index = 0
795
        self.block_index = 0
796
        self.block_hash = -1
797
        self.block = ''
798

    
799
        self.range_index = -1
800
        self.offset, self.length = self.ranges[0]
801

    
802
    def __iter__(self):
803
        return self
804

    
805
    def part_iterator(self):
806
        if self.length > 0:
807
            # Get the file for the current offset.
808
            file_size = self.sizes[self.file_index]
809
            while self.offset >= file_size:
810
                self.offset -= file_size
811
                self.file_index += 1
812
                file_size = self.sizes[self.file_index]
813

    
814
            # Get the block for the current position.
815
            self.block_index = int(self.offset / self.backend.block_size)
816
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
817
                self.block_hash = self.hashmaps[
818
                    self.file_index][self.block_index]
819
                try:
820
                    self.block = self.backend.get_block(self.block_hash)
821
                except ItemNotExists:
822
                    raise ItemNotFound('Block does not exist')
823

    
824
            # Get the data from the block.
825
            bo = self.offset % self.backend.block_size
826
            bs = self.backend.block_size
827
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
828
                    self.sizes[self.file_index] % self.backend.block_size):
829
                bs = self.sizes[self.file_index] % self.backend.block_size
830
            bl = min(self.length, bs - bo)
831
            data = self.block[bo:bo + bl]
832
            self.offset += bl
833
            self.length -= bl
834
            return data
835
        else:
836
            raise StopIteration
837

    
838
    def next(self):
839
        if len(self.ranges) == 1:
840
            return self.part_iterator()
841
        if self.range_index == len(self.ranges):
842
            raise StopIteration
843
        try:
844
            if self.range_index == -1:
845
                raise StopIteration
846
            return self.part_iterator()
847
        except StopIteration:
848
            self.range_index += 1
849
            out = []
850
            if self.range_index < len(self.ranges):
851
                # Part header.
852
                self.offset, self.length = self.ranges[self.range_index]
853
                self.file_index = 0
854
                if self.range_index > 0:
855
                    out.append('')
856
                out.append('--' + self.boundary)
857
                out.append('Content-Range: bytes %d-%d/%d' % (
858
                    self.offset, self.offset + self.length - 1, self.size))
859
                out.append('Content-Transfer-Encoding: binary')
860
                out.append('')
861
                out.append('')
862
                return '\r\n'.join(out)
863
            else:
864
                # Footer.
865
                out.append('')
866
                out.append('--' + self.boundary + '--')
867
                out.append('')
868
                return '\r\n'.join(out)
869

    
870

    
871
def object_data_response(request, sizes, hashmaps, meta, public=False):
872
    """Get the HttpResponse object for replying with the object's data."""
873

    
874
    # Range handling.
875
    size = sum(sizes)
876
    ranges = get_range(request, size)
877
    if ranges is None:
878
        ranges = [(0, size)]
879
        ret = 200
880
    else:
881
        check = [True for offset, length in ranges if
882
                 length <= 0 or length > size or
883
                 offset < 0 or offset >= size or
884
                 offset + length > size]
885
        if len(check) > 0:
886
            raise RangeNotSatisfiable('Requested range exceeds object limits')
887
        ret = 206
888
        if_range = request.META.get('HTTP_IF_RANGE')
889
        if if_range:
890
            try:
891
                # Modification time has passed instead.
892
                last_modified = parse_http_date(if_range)
893
                if last_modified != meta['modified']:
894
                    ranges = [(0, size)]
895
                    ret = 200
896
            except ValueError:
897
                if if_range != meta['checksum']:
898
                    ranges = [(0, size)]
899
                    ret = 200
900

    
901
    if ret == 206 and len(ranges) > 1:
902
        boundary = uuid.uuid4().hex
903
    else:
904
        boundary = ''
905
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
906
    response = HttpResponse(wrapper, status=ret)
907
    put_object_headers(
908
            response, meta, restricted=public, token=getattr(request, 'token', None))
909
    if ret == 206:
910
        if len(ranges) == 1:
911
            offset, length = ranges[0]
912
            response[
913
                'Content-Length'] = length  # Update with the correct length.
914
            response['Content-Range'] = 'bytes %d-%d/%d' % (
915
                offset, offset + length - 1, size)
916
        else:
917
            del(response['Content-Length'])
918
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (
919
                boundary,)
920
    return response
921

    
922

    
923
def put_object_block(request, hashmap, data, offset):
924
    """Put one block of data at the given offset."""
925

    
926
    bi = int(offset / request.backend.block_size)
927
    bo = offset % request.backend.block_size
928
    bl = min(len(data), request.backend.block_size - bo)
929
    if bi < len(hashmap):
930
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
931
    else:
932
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
933
    return bl  # Return ammount of data written.
934

    
935

    
936
def hashmap_md5(backend, hashmap, size):
937
    """Produce the MD5 sum from the data in the hashmap."""
938

    
939
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
940
    md5 = hashlib.md5()
941
    bs = backend.block_size
942
    for bi, hash in enumerate(hashmap):
943
        data = backend.get_block(hash)  # Blocks come in padded.
944
        if bi == len(hashmap) - 1:
945
            data = data[:size % bs]
946
        md5.update(data)
947
    return md5.hexdigest().lower()
948

    
949

    
950
def simple_list_response(request, l):
951
    if request.serialization == 'text':
952
        return '\n'.join(l) + '\n'
953
    if request.serialization == 'xml':
954
        return render_to_string('items.xml', {'items': l})
955
    if request.serialization == 'json':
956
        return json.dumps(l)
957

    
958

    
959
from pithos.backends.util import PithosBackendPool
960
POOL_SIZE = 5
961
if RADOS_STORAGE:
962
    BLOCK_PARAMS = { 'mappool': RADOS_POOL_MAPS,
963
                     'blockpool': RADOS_POOL_BLOCKS,
964
                   }
965
else:
966
    BLOCK_PARAMS = { 'mappool': None,
967
                     'blockpool': None,
968
                   }
969

    
970

    
971
_pithos_backend_pool = PithosBackendPool(
972
        size=POOL_SIZE,
973
        db_module=BACKEND_DB_MODULE,
974
        db_connection=BACKEND_DB_CONNECTION,
975
        block_module=BACKEND_BLOCK_MODULE,
976
        block_path=BACKEND_BLOCK_PATH,
977
        block_umask=BACKEND_BLOCK_UMASK,
978
        queue_module=BACKEND_QUEUE_MODULE,
979
        queue_hosts=BACKEND_QUEUE_HOSTS,
980
        queue_exchange=BACKEND_QUEUE_EXCHANGE,
981
        quotaholder_enabled=USE_QUOTAHOLDER,
982
        quotaholder_url=QUOTAHOLDER_URL,
983
        quotaholder_token=QUOTAHOLDER_TOKEN,
984
        quotaholder_client_poolsize=QUOTAHOLDER_POOLSIZE,
985
        free_versioning=BACKEND_FREE_VERSIONING,
986
        block_params=BLOCK_PARAMS)
987

    
988
def get_backend():
989
    backend = _pithos_backend_pool.pool_get()
990
    backend.default_policy['quota'] = BACKEND_QUOTA
991
    backend.default_policy['versioning'] = BACKEND_VERSIONING
992
    backend.messages = []
993
    return backend
994

    
995

    
996
def update_request_headers(request):
997
    # Handle URL-encoded keys and values.
998
    meta = dict([(
999
        k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
1000
    for k, v in meta.iteritems():
1001
        try:
1002
            k.decode('ascii')
1003
            v.decode('ascii')
1004
        except UnicodeDecodeError:
1005
            raise BadRequest('Bad character in headers.')
1006
        if '%' in k or '%' in v:
1007
            del(request.META[k])
1008
            request.META[unquote(k)] = smart_unicode(unquote(
1009
                v), strings_only=True)
1010

    
1011

    
1012
def update_response_headers(request, response):
1013
    if request.serialization == 'xml':
1014
        response['Content-Type'] = 'application/xml; charset=UTF-8'
1015
    elif request.serialization == 'json':
1016
        response['Content-Type'] = 'application/json; charset=UTF-8'
1017
    elif not response['Content-Type']:
1018
        response['Content-Type'] = 'text/plain; charset=UTF-8'
1019

    
1020
    if (not response.has_header('Content-Length') and
1021
        not (response.has_header('Content-Type') and
1022
             response['Content-Type'].startswith('multipart/byteranges'))):
1023
        response['Content-Length'] = len(response.content)
1024

    
1025
    # URL-encode unicode in headers.
1026
    meta = response.items()
1027
    for k, v in meta:
1028
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
1029
                k.startswith('X-Object-') or k.startswith('Content-')):
1030
            del(response[k])
1031
            response[quote(k)] = quote(v, safe='/=,:@; ')
1032

    
1033

    
1034
def render_fault(request, fault):
1035
    if isinstance(fault, InternalServerError) and settings.DEBUG:
1036
        fault.details = format_exc(fault)
1037

    
1038
    request.serialization = 'text'
1039
    data = fault.message + '\n'
1040
    if fault.details:
1041
        data += '\n' + fault.details
1042
    response = HttpResponse(data, status=fault.code)
1043
    update_response_headers(request, response)
1044
    return response
1045

    
1046

    
1047
def request_serialization(request, format_allowed=False):
1048
    """Return the serialization format requested.
1049

1050
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
1051
    """
1052

    
1053
    if not format_allowed:
1054
        return 'text'
1055

    
1056
    format = request.GET.get('format')
1057
    if format == 'json':
1058
        return 'json'
1059
    elif format == 'xml':
1060
        return 'xml'
1061

    
1062
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
1063
        accept, sep, rest = item.strip().partition(';')
1064
        if accept == 'application/json':
1065
            return 'json'
1066
        elif accept == 'application/xml' or accept == 'text/xml':
1067
            return 'xml'
1068

    
1069
    return 'text'
1070

    
1071
def get_pithos_usage(usage):
1072
    for u in usage:
1073
        if u.get('name') == 'pithos+.diskspace':
1074
            return u
1075

    
1076
def api_method(http_method=None, format_allowed=False, user_required=True,
1077
        request_usage=False):
1078
    """Decorator function for views that implement an API method."""
1079

    
1080
    def decorator(func):
1081
        @wraps(func)
1082
        def wrapper(request, *args, **kwargs):
1083
            try:
1084
                if http_method and request.method != http_method:
1085
                    raise BadRequest('Method not allowed.')
1086

    
1087
                if user_required:
1088
                    token = None
1089
                    if request.method in ('HEAD', 'GET') and COOKIE_NAME in request.COOKIES:
1090
                        cookie_value = unquote(
1091
                            request.COOKIES.get(COOKIE_NAME, ''))
1092
                        account, sep, token = cookie_value.partition('|')
1093
                    get_user(request,
1094
                             AUTHENTICATION_URL,
1095
                             AUTHENTICATION_USERS,
1096
                             token,
1097
                             request_usage)
1098
                    if  getattr(request, 'user', None) is None:
1099
                        raise Unauthorized('Access denied')
1100
                    assert getattr(request, 'user_uniq', None) != None
1101
                    request.user_usage = get_pithos_usage(request.user.get('usage', []))
1102
                    request.token = request.GET.get('X-Auth-Token', request.META.get('HTTP_X_AUTH_TOKEN', token))
1103

    
1104
                # The args variable may contain up to (account, container, object).
1105
                if len(args) > 1 and len(args[1]) > 256:
1106
                    raise BadRequest('Container name too large.')
1107
                if len(args) > 2 and len(args[2]) > 1024:
1108
                    raise BadRequest('Object name too large.')
1109

    
1110
                # Format and check headers.
1111
                update_request_headers(request)
1112

    
1113
                # Fill in custom request variables.
1114
                request.serialization = request_serialization(
1115
                    request, format_allowed)
1116
                request.backend = get_backend()
1117

    
1118
                response = func(request, *args, **kwargs)
1119
                update_response_headers(request, response)
1120
                return response
1121
            except Fault, fault:
1122
                if fault.code >= 500:
1123
                    logger.exception("API Fault")
1124
                return render_fault(request, fault)
1125
            except BaseException, e:
1126
                logger.exception('Unexpected error: %s' % e)
1127
                fault = InternalServerError('Unexpected error')
1128
                return render_fault(request, fault)
1129
            finally:
1130
                if getattr(request, 'backend', None) is not None:
1131
                    request.backend.close()
1132
        return wrapper
1133
    return decorator