Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 2042a902

History | View | Annotate | Download (39.7 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
#
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
#
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
#
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from synnefo.lib.parsedate import parse_http_date_safe, parse_http_date
52
from synnefo.lib.astakos import get_user
53

    
54
from pithos.api.faults import (
55
    Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
56
    Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
57
    RangeNotSatisfiable, InternalServerError, NotImplemented)
58
from pithos.api.short_url import encode_url
59
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
60
                                 BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
61
                                 BACKEND_BLOCK_UMASK,
62
                                 BACKEND_QUEUE_MODULE, BACKEND_QUEUE_HOSTS,
63
                                 BACKEND_QUEUE_EXCHANGE,
64
                                 QUOTAHOLDER_URL, QUOTAHOLDER_TOKEN,
65
                                 BACKEND_QUOTA, BACKEND_VERSIONING,
66
                                 BACKEND_FREE_VERSIONING,
67
                                 AUTHENTICATION_URL, AUTHENTICATION_USERS,
68
                                 SERVICE_TOKEN, COOKIE_NAME, USER_INFO_URL,
69
                                 RADOS_STORAGE, RADOS_POOL_BLOCKS,
70
                                 RADOS_POOL_MAPS)
71
from pithos.backends import connect_backend
72
from pithos.backends.base import (NotAllowedError, QuotaError, ItemNotExists,
73
                                  VersionNotExists)
74
from synnefo.lib.astakos import get_user_uuid, get_username
75

    
76
import logging
77
import re
78
import hashlib
79
import uuid
80
import decimal
81

    
82

    
83
logger = logging.getLogger(__name__)
84

    
85

    
86
class UTC(tzinfo):
87
    def utcoffset(self, dt):
88
        return timedelta(0)
89

    
90
    def tzname(self, dt):
91
        return 'UTC'
92

    
93
    def dst(self, dt):
94
        return timedelta(0)
95

    
96

    
97
def json_encode_decimal(obj):
98
    if isinstance(obj, decimal.Decimal):
99
        return str(obj)
100
    raise TypeError(repr(obj) + " is not JSON serializable")
101

    
102

    
103
def isoformat(d):
104
    """Return an ISO8601 date string that includes a timezone."""
105

    
106
    return d.replace(tzinfo=UTC()).isoformat()
107

    
108

    
109
def rename_meta_key(d, old, new):
110
    if old not in d:
111
        return
112
    d[new] = d[old]
113
    del(d[old])
114

    
115

    
116
def printable_header_dict(d):
117
    """Format a meta dictionary for printing out json/xml.
118

119
    Convert all keys to lower case and replace dashes with underscores.
120
    Format 'last_modified' timestamp.
121
    """
122

    
123
    if 'last_modified' in d and d['last_modified']:
124
        d['last_modified'] = isoformat(
125
            datetime.fromtimestamp(d['last_modified']))
126
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
127

    
128

    
129
def format_header_key(k):
130
    """Convert underscores to dashes and capitalize intra-dash strings."""
131
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
132

    
133

    
134
def get_header_prefix(request, prefix):
135
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
136

    
137
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
138
    # TODO: Document or remove '~' replacing.
139
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
140

    
141

    
142
def check_meta_headers(meta):
143
    if len(meta) > 90:
144
        raise BadRequest('Too many headers.')
145
    for k, v in meta.iteritems():
146
        if len(k) > 128:
147
            raise BadRequest('Header name too large.')
148
        if len(v) > 256:
149
            raise BadRequest('Header value too large.')
150

    
151

    
152
def get_account_headers(request):
153
    meta = get_header_prefix(request, 'X-Account-Meta-')
154
    check_meta_headers(meta)
155
    groups = {}
156
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
157
        n = k[16:].lower()
158
        if '-' in n or '_' in n:
159
            raise BadRequest('Bad characters in group name')
160
        groups[n] = v.replace(' ', '').split(',')
161
        while '' in groups[n]:
162
            groups[n].remove('')
163
    return meta, groups
164

    
165

    
166
def put_account_translation_headers(response, accounts):
167
    for x in accounts:
168
        k = smart_str('X-Account-Presentation-%s' % x, strings_only=True)
169
        v = smart_str(retrieve_username(x), strings_only=True)
170
        response[k] = v
171

    
172

    
173
def put_account_headers(response, meta, groups, policy):
174
    if 'count' in meta:
175
        response['X-Account-Container-Count'] = meta['count']
176
    if 'bytes' in meta:
177
        response['X-Account-Bytes-Used'] = meta['bytes']
178
    response['Last-Modified'] = http_date(int(meta['modified']))
179
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
180
        response[smart_str(
181
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
182
    if 'until_timestamp' in meta:
183
        response['X-Account-Until-Timestamp'] = http_date(
184
            int(meta['until_timestamp']))
185
    for k, v in groups.iteritems():
186
        k = smart_str(k, strings_only=True)
187
        k = format_header_key('X-Account-Group-' + k)
188
        v = smart_str(','.join(v), strings_only=True)
189
        response[k] = v
190
    for k, v in policy.iteritems():
191
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
192

    
193

    
194
def get_container_headers(request):
195
    meta = get_header_prefix(request, 'X-Container-Meta-')
196
    check_meta_headers(meta)
197
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
198
    return meta, policy
199

    
200

    
201
def put_container_headers(request, response, meta, policy):
202
    if 'count' in meta:
203
        response['X-Container-Object-Count'] = meta['count']
204
    if 'bytes' in meta:
205
        response['X-Container-Bytes-Used'] = meta['bytes']
206
    response['Last-Modified'] = http_date(int(meta['modified']))
207
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
208
        response[smart_str(
209
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
210
    l = [smart_str(x, strings_only=True) for x in meta['object_meta']
211
         if x.startswith('X-Object-Meta-')]
212
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
213
    response['X-Container-Block-Size'] = request.backend.block_size
214
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
215
    if 'until_timestamp' in meta:
216
        response['X-Container-Until-Timestamp'] = http_date(
217
            int(meta['until_timestamp']))
218
    for k, v in policy.iteritems():
219
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
220

    
221

    
222
def get_object_headers(request):
223
    content_type = request.META.get('CONTENT_TYPE', None)
224
    meta = get_header_prefix(request, 'X-Object-Meta-')
225
    check_meta_headers(meta)
226
    if request.META.get('HTTP_CONTENT_ENCODING'):
227
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
228
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
229
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
230
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
231
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
232
    return content_type, meta, get_sharing(request), get_public(request)
233

    
234

    
235
def put_object_headers(response, meta, restricted=False):
236
    response['ETag'] = meta['checksum']
237
    response['Content-Length'] = meta['bytes']
238
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
239
    response['Last-Modified'] = http_date(int(meta['modified']))
240
    if not restricted:
241
        response['X-Object-Hash'] = meta['hash']
242
        response['X-Object-UUID'] = meta['uuid']
243
        modified_by = retrieve_username(meta['modified_by'])
244
        response['X-Object-Modified-By'] = smart_str(
245
            modified_by, strings_only=True)
246
        response['X-Object-Version'] = meta['version']
247
        response['X-Object-Version-Timestamp'] = http_date(
248
            int(meta['version_timestamp']))
249
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
250
            response[smart_str(
251
                k, strings_only=True)] = smart_str(meta[k], strings_only=True)
252
        for k in (
253
            'Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
254
            'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
255
                'X-Object-Public'):
256
            if k in meta:
257
                response[k] = smart_str(meta[k], strings_only=True)
258
    else:
259
        for k in ('Content-Encoding', 'Content-Disposition'):
260
            if k in meta:
261
                response[k] = smart_str(meta[k], strings_only=True)
262

    
263

    
264
def update_manifest_meta(request, v_account, meta):
265
    """Update metadata if the object has an X-Object-Manifest."""
266

    
267
    if 'X-Object-Manifest' in meta:
268
        etag = ''
269
        bytes = 0
270
        try:
271
            src_container, src_name = split_container_object_string(
272
                '/' + meta['X-Object-Manifest'])
273
            objects = request.backend.list_objects(
274
                request.user_uniq, v_account,
275
                src_container, prefix=src_name, virtual=False)
276
            for x in objects:
277
                src_meta = request.backend.get_object_meta(request.user_uniq,
278
                                                           v_account, src_container, x[0], 'pithos', x[1])
279
                etag += src_meta['checksum']
280
                bytes += src_meta['bytes']
281
        except:
282
            # Ignore errors.
283
            return
284
        meta['bytes'] = bytes
285
        md5 = hashlib.md5()
286
        md5.update(etag)
287
        meta['checksum'] = md5.hexdigest().lower()
288

    
289
def is_uuid(str):
290
    try:
291
        uuid.UUID(str)
292
    except ValueError:
293
       return False
294
    else:
295
       return True
296

    
297
def retrieve_username(uuid):
298
    try:
299
        return get_username(
300
            SERVICE_TOKEN, uuid, USER_INFO_URL, AUTHENTICATION_USERS)
301
    except:
302
        # if it fails just leave the metadata intact
303
        return uuid
304

    
305
def retrieve_uuid(username):
306
    if is_uuid(username):
307
        return username
308

    
309
    try:
310
        return get_user_uuid(
311
            SERVICE_TOKEN, username, USER_INFO_URL, AUTHENTICATION_USERS)
312
    except Exception, e:
313
        if e.args:
314
            status = e.args[-1]
315
            if status == 404:
316
                raise ItemNotExists(username)
317
        raise
318

    
319
def replace_permissions_username(holder):
320
    try:
321
        # check first for a group permission
322
        account, group = holder.split(':')
323
    except ValueError:
324
        return retrieve_uuid(holder)
325
    else:
326
        return ':'.join([retrieve_uuid(account), group])
327

    
328
def replace_permissions_uuid(holder):
329
    try:
330
        # check first for a group permission
331
        account, group = holder.split(':')
332
    except ValueError:
333
        return retrieve_username(holder)
334
    else:
335
        return ':'.join([retrieve_username(account), group])
336

    
337
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
338
    if permissions is None:
339
        return
340
    allowed, perm_path, perms = permissions
341
    if len(perms) == 0:
342
        return
343

    
344
    perms['read'] = [replace_permissions_uuid(x) for x in perms.get('read', [])]
345
    perms['write'] = \
346
        [replace_permissions_uuid(x) for x in perms.get('write', [])]
347

    
348
    ret = []
349

    
350
    r = ','.join(perms.get('read', []))
351
    if r:
352
        ret.append('read=' + r)
353
    w = ','.join(perms.get('write', []))
354
    if w:
355
        ret.append('write=' + w)
356
    meta['X-Object-Sharing'] = '; '.join(ret)
357
    if '/'.join((v_account, v_container, v_object)) != perm_path:
358
        meta['X-Object-Shared-By'] = perm_path
359
    if request.user_uniq != v_account:
360
        meta['X-Object-Allowed-To'] = allowed
361

    
362

    
363
def update_public_meta(public, meta):
364
    if not public:
365
        return
366
    meta['X-Object-Public'] = '/public/' + encode_url(public)
367

    
368

    
369
def validate_modification_preconditions(request, meta):
370
    """Check that the modified timestamp conforms with the preconditions set."""
371

    
372
    if 'modified' not in meta:
373
        return  # TODO: Always return?
374

    
375
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
376
    if if_modified_since is not None:
377
        if_modified_since = parse_http_date_safe(if_modified_since)
378
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
379
        raise NotModified('Resource has not been modified')
380

    
381
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
382
    if if_unmodified_since is not None:
383
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
384
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
385
        raise PreconditionFailed('Resource has been modified')
386

    
387

    
388
def validate_matching_preconditions(request, meta):
389
    """Check that the ETag conforms with the preconditions set."""
390

    
391
    etag = meta['checksum']
392
    if not etag:
393
        etag = None
394

    
395
    if_match = request.META.get('HTTP_IF_MATCH')
396
    if if_match is not None:
397
        if etag is None:
398
            raise PreconditionFailed('Resource does not exist')
399
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
400
            raise PreconditionFailed('Resource ETag does not match')
401

    
402
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
403
    if if_none_match is not None:
404
        # TODO: If this passes, must ignore If-Modified-Since header.
405
        if etag is not None:
406
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
407
                # TODO: Continue if an If-Modified-Since header is present.
408
                if request.method in ('HEAD', 'GET'):
409
                    raise NotModified('Resource ETag matches')
410
                raise PreconditionFailed('Resource exists or ETag matches')
411

    
412

    
413
def split_container_object_string(s):
414
    if not len(s) > 0 or s[0] != '/':
415
        raise ValueError
416
    s = s[1:]
417
    pos = s.find('/')
418
    if pos == -1 or pos == len(s) - 1:
419
        raise ValueError
420
    return s[:pos], s[(pos + 1):]
421

    
422

    
423
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False, delimiter=None):
424
    """Copy or move an object."""
425

    
426
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
427
        del(request.META['CONTENT_TYPE'])
428
    content_type, meta, permissions, public = get_object_headers(request)
429
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
430
    try:
431
        if move:
432
            version_id = request.backend.move_object(
433
                request.user_uniq, src_account, src_container, src_name,
434
                dest_account, dest_container, dest_name,
435
                content_type, 'pithos', meta, False, permissions, delimiter)
436
        else:
437
            version_id = request.backend.copy_object(
438
                request.user_uniq, src_account, src_container, src_name,
439
                dest_account, dest_container, dest_name,
440
                content_type, 'pithos', meta, False, permissions, src_version, delimiter)
441
    except NotAllowedError:
442
        raise Forbidden('Not allowed')
443
    except (ItemNotExists, VersionNotExists):
444
        raise ItemNotFound('Container or object does not exist')
445
    except ValueError:
446
        raise BadRequest('Invalid sharing header')
447
    except QuotaError:
448
        raise RequestEntityTooLarge('Quota exceeded')
449
    if public is not None:
450
        try:
451
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
452
        except NotAllowedError:
453
            raise Forbidden('Not allowed')
454
        except ItemNotExists:
455
            raise ItemNotFound('Object does not exist')
456
    return version_id
457

    
458

    
459
def get_int_parameter(p):
460
    if p is not None:
461
        try:
462
            p = int(p)
463
        except ValueError:
464
            return None
465
        if p < 0:
466
            return None
467
    return p
468

    
469

    
470
def get_content_length(request):
471
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
472
    if content_length is None:
473
        raise LengthRequired('Missing or invalid Content-Length header')
474
    return content_length
475

    
476

    
477
def get_range(request, size):
478
    """Parse a Range header from the request.
479

480
    Either returns None, when the header is not existent or should be ignored,
481
    or a list of (offset, length) tuples - should be further checked.
482
    """
483

    
484
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
485
    if not ranges.startswith('bytes='):
486
        return None
487

    
488
    ret = []
489
    for r in (x.strip() for x in ranges[6:].split(',')):
490
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
491
        m = p.match(r)
492
        if not m:
493
            return None
494
        offset = m.group('offset')
495
        upto = m.group('upto')
496
        if offset == '' and upto == '':
497
            return None
498

    
499
        if offset != '':
500
            offset = int(offset)
501
            if upto != '':
502
                upto = int(upto)
503
                if offset > upto:
504
                    return None
505
                ret.append((offset, upto - offset + 1))
506
            else:
507
                ret.append((offset, size - offset))
508
        else:
509
            length = int(upto)
510
            ret.append((size - length, length))
511

    
512
    return ret
513

    
514

    
515
def get_content_range(request):
516
    """Parse a Content-Range header from the request.
517

518
    Either returns None, when the header is not existent or should be ignored,
519
    or an (offset, length, total) tuple - check as length, total may be None.
520
    Returns (None, None, None) if the provided range is '*/*'.
521
    """
522

    
523
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
524
    if not ranges:
525
        return None
526

    
527
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
528
    m = p.match(ranges)
529
    if not m:
530
        if ranges == 'bytes */*':
531
            return (None, None, None)
532
        return None
533
    offset = int(m.group('offset'))
534
    upto = m.group('upto')
535
    total = m.group('total')
536
    if upto != '':
537
        upto = int(upto)
538
    else:
539
        upto = None
540
    if total != '*':
541
        total = int(total)
542
    else:
543
        total = None
544
    if (upto is not None and offset > upto) or \
545
        (total is not None and offset >= total) or \
546
            (total is not None and upto is not None and upto >= total):
547
        return None
548

    
549
    if upto is None:
550
        length = None
551
    else:
552
        length = upto - offset + 1
553
    return (offset, length, total)
554

    
555

    
556
def get_sharing(request):
557
    """Parse an X-Object-Sharing header from the request.
558

559
    Raises BadRequest on error.
560
    """
561

    
562
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
563
    if permissions is None:
564
        return None
565

    
566
    # TODO: Document or remove '~' replacing.
567
    permissions = permissions.replace('~', '')
568

    
569
    ret = {}
570
    permissions = permissions.replace(' ', '')
571
    if permissions == '':
572
        return ret
573
    for perm in (x for x in permissions.split(';')):
574
        if perm.startswith('read='):
575
            ret['read'] = list(set(
576
                [v.replace(' ', '').lower() for v in perm[5:].split(',')]))
577
            if '' in ret['read']:
578
                ret['read'].remove('')
579
            if '*' in ret['read']:
580
                ret['read'] = ['*']
581
            if len(ret['read']) == 0:
582
                raise BadRequest(
583
                    'Bad X-Object-Sharing header value: invalid length')
584
        elif perm.startswith('write='):
585
            ret['write'] = list(set(
586
                [v.replace(' ', '').lower() for v in perm[6:].split(',')]))
587
            if '' in ret['write']:
588
                ret['write'].remove('')
589
            if '*' in ret['write']:
590
                ret['write'] = ['*']
591
            if len(ret['write']) == 0:
592
                raise BadRequest(
593
                    'Bad X-Object-Sharing header value: invalid length')
594
        else:
595
            raise BadRequest(
596
                'Bad X-Object-Sharing header value: missing prefix')
597

    
598
    # replace username with uuid
599
    try:
600
        ret['read'] = \
601
            [replace_permissions_username(x) for x in ret.get('read', [])]
602
        ret['write'] = \
603
            [replace_permissions_username(x) for x in ret.get('write', [])]
604
    except ItemNotExists, e:
605
        raise BadRequest(
606
            'Bad X-Object-Sharing header value: unknown account: %s' % e)
607

    
608
    # Keep duplicates only in write list.
609
    dups = [x for x in ret.get(
610
        'read', []) if x in ret.get('write', []) and x != '*']
611
    if dups:
612
        for x in dups:
613
            ret['read'].remove(x)
614
        if len(ret['read']) == 0:
615
            del(ret['read'])
616

    
617
    return ret
618

    
619

    
620
def get_public(request):
621
    """Parse an X-Object-Public header from the request.
622

623
    Raises BadRequest on error.
624
    """
625

    
626
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
627
    if public is None:
628
        return None
629

    
630
    public = public.replace(' ', '').lower()
631
    if public == 'true':
632
        return True
633
    elif public == 'false' or public == '':
634
        return False
635
    raise BadRequest('Bad X-Object-Public header value')
636

    
637

    
638
def raw_input_socket(request):
639
    """Return the socket for reading the rest of the request."""
640

    
641
    server_software = request.META.get('SERVER_SOFTWARE')
642
    if server_software and server_software.startswith('mod_python'):
643
        return request._req
644
    if 'wsgi.input' in request.environ:
645
        return request.environ['wsgi.input']
646
    raise NotImplemented('Unknown server software')
647

    
648
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024)  # 5GB
649

    
650

    
651
def socket_read_iterator(request, length=0, blocksize=4096):
652
    """Return a maximum of blocksize data read from the socket in each iteration.
653

654
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
655
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
656
    """
657

    
658
    sock = raw_input_socket(request)
659
    if length < 0:  # Chunked transfers
660
        # Small version (server does the dechunking).
661
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
662
            while length < MAX_UPLOAD_SIZE:
663
                data = sock.read(blocksize)
664
                if data == '':
665
                    return
666
                yield data
667
            raise BadRequest('Maximum size is reached')
668

    
669
        # Long version (do the dechunking).
670
        data = ''
671
        while length < MAX_UPLOAD_SIZE:
672
            # Get chunk size.
673
            if hasattr(sock, 'readline'):
674
                chunk_length = sock.readline()
675
            else:
676
                chunk_length = ''
677
                while chunk_length[-1:] != '\n':
678
                    chunk_length += sock.read(1)
679
                chunk_length.strip()
680
            pos = chunk_length.find(';')
681
            if pos >= 0:
682
                chunk_length = chunk_length[:pos]
683
            try:
684
                chunk_length = int(chunk_length, 16)
685
            except Exception, e:
686
                raise BadRequest('Bad chunk size')
687
                                 # TODO: Change to something more appropriate.
688
            # Check if done.
689
            if chunk_length == 0:
690
                if len(data) > 0:
691
                    yield data
692
                return
693
            # Get the actual data.
694
            while chunk_length > 0:
695
                chunk = sock.read(min(chunk_length, blocksize))
696
                chunk_length -= len(chunk)
697
                if length > 0:
698
                    length += len(chunk)
699
                data += chunk
700
                if len(data) >= blocksize:
701
                    ret = data[:blocksize]
702
                    data = data[blocksize:]
703
                    yield ret
704
            sock.read(2)  # CRLF
705
        raise BadRequest('Maximum size is reached')
706
    else:
707
        if length > MAX_UPLOAD_SIZE:
708
            raise BadRequest('Maximum size is reached')
709
        while length > 0:
710
            data = sock.read(min(length, blocksize))
711
            if not data:
712
                raise BadRequest()
713
            length -= len(data)
714
            yield data
715

    
716

    
717
class SaveToBackendHandler(FileUploadHandler):
718
    """Handle a file from an HTML form the django way."""
719

    
720
    def __init__(self, request=None):
721
        super(SaveToBackendHandler, self).__init__(request)
722
        self.backend = request.backend
723

    
724
    def put_data(self, length):
725
        if len(self.data) >= length:
726
            block = self.data[:length]
727
            self.file.hashmap.append(self.backend.put_block(block))
728
            self.md5.update(block)
729
            self.data = self.data[length:]
730

    
731
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
732
        self.md5 = hashlib.md5()
733
        self.data = ''
734
        self.file = UploadedFile(
735
            name=file_name, content_type=content_type, charset=charset)
736
        self.file.size = 0
737
        self.file.hashmap = []
738

    
739
    def receive_data_chunk(self, raw_data, start):
740
        self.data += raw_data
741
        self.file.size += len(raw_data)
742
        self.put_data(self.request.backend.block_size)
743
        return None
744

    
745
    def file_complete(self, file_size):
746
        l = len(self.data)
747
        if l > 0:
748
            self.put_data(l)
749
        self.file.etag = self.md5.hexdigest().lower()
750
        return self.file
751

    
752

    
753
class ObjectWrapper(object):
754
    """Return the object's data block-per-block in each iteration.
755

756
    Read from the object using the offset and length provided in each entry of the range list.
757
    """
758

    
759
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
760
        self.backend = backend
761
        self.ranges = ranges
762
        self.sizes = sizes
763
        self.hashmaps = hashmaps
764
        self.boundary = boundary
765
        self.size = sum(self.sizes)
766

    
767
        self.file_index = 0
768
        self.block_index = 0
769
        self.block_hash = -1
770
        self.block = ''
771

    
772
        self.range_index = -1
773
        self.offset, self.length = self.ranges[0]
774

    
775
    def __iter__(self):
776
        return self
777

    
778
    def part_iterator(self):
779
        if self.length > 0:
780
            # Get the file for the current offset.
781
            file_size = self.sizes[self.file_index]
782
            while self.offset >= file_size:
783
                self.offset -= file_size
784
                self.file_index += 1
785
                file_size = self.sizes[self.file_index]
786

    
787
            # Get the block for the current position.
788
            self.block_index = int(self.offset / self.backend.block_size)
789
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
790
                self.block_hash = self.hashmaps[
791
                    self.file_index][self.block_index]
792
                try:
793
                    self.block = self.backend.get_block(self.block_hash)
794
                except ItemNotExists:
795
                    raise ItemNotFound('Block does not exist')
796

    
797
            # Get the data from the block.
798
            bo = self.offset % self.backend.block_size
799
            bs = self.backend.block_size
800
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
801
                    self.sizes[self.file_index] % self.backend.block_size):
802
                bs = self.sizes[self.file_index] % self.backend.block_size
803
            bl = min(self.length, bs - bo)
804
            data = self.block[bo:bo + bl]
805
            self.offset += bl
806
            self.length -= bl
807
            return data
808
        else:
809
            raise StopIteration
810

    
811
    def next(self):
812
        if len(self.ranges) == 1:
813
            return self.part_iterator()
814
        if self.range_index == len(self.ranges):
815
            raise StopIteration
816
        try:
817
            if self.range_index == -1:
818
                raise StopIteration
819
            return self.part_iterator()
820
        except StopIteration:
821
            self.range_index += 1
822
            out = []
823
            if self.range_index < len(self.ranges):
824
                # Part header.
825
                self.offset, self.length = self.ranges[self.range_index]
826
                self.file_index = 0
827
                if self.range_index > 0:
828
                    out.append('')
829
                out.append('--' + self.boundary)
830
                out.append('Content-Range: bytes %d-%d/%d' % (
831
                    self.offset, self.offset + self.length - 1, self.size))
832
                out.append('Content-Transfer-Encoding: binary')
833
                out.append('')
834
                out.append('')
835
                return '\r\n'.join(out)
836
            else:
837
                # Footer.
838
                out.append('')
839
                out.append('--' + self.boundary + '--')
840
                out.append('')
841
                return '\r\n'.join(out)
842

    
843

    
844
def object_data_response(request, sizes, hashmaps, meta, public=False):
845
    """Get the HttpResponse object for replying with the object's data."""
846

    
847
    # Range handling.
848
    size = sum(sizes)
849
    ranges = get_range(request, size)
850
    if ranges is None:
851
        ranges = [(0, size)]
852
        ret = 200
853
    else:
854
        check = [True for offset, length in ranges if
855
                 length <= 0 or length > size or
856
                 offset < 0 or offset >= size or
857
                 offset + length > size]
858
        if len(check) > 0:
859
            raise RangeNotSatisfiable('Requested range exceeds object limits')
860
        ret = 206
861
        if_range = request.META.get('HTTP_IF_RANGE')
862
        if if_range:
863
            try:
864
                # Modification time has passed instead.
865
                last_modified = parse_http_date(if_range)
866
                if last_modified != meta['modified']:
867
                    ranges = [(0, size)]
868
                    ret = 200
869
            except ValueError:
870
                if if_range != meta['checksum']:
871
                    ranges = [(0, size)]
872
                    ret = 200
873

    
874
    if ret == 206 and len(ranges) > 1:
875
        boundary = uuid.uuid4().hex
876
    else:
877
        boundary = ''
878
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
879
    response = HttpResponse(wrapper, status=ret)
880
    put_object_headers(response, meta, public)
881
    if ret == 206:
882
        if len(ranges) == 1:
883
            offset, length = ranges[0]
884
            response[
885
                'Content-Length'] = length  # Update with the correct length.
886
            response['Content-Range'] = 'bytes %d-%d/%d' % (
887
                offset, offset + length - 1, size)
888
        else:
889
            del(response['Content-Length'])
890
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (
891
                boundary,)
892
    return response
893

    
894

    
895
def put_object_block(request, hashmap, data, offset):
896
    """Put one block of data at the given offset."""
897

    
898
    bi = int(offset / request.backend.block_size)
899
    bo = offset % request.backend.block_size
900
    bl = min(len(data), request.backend.block_size - bo)
901
    if bi < len(hashmap):
902
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
903
    else:
904
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
905
    return bl  # Return ammount of data written.
906

    
907

    
908
def hashmap_md5(backend, hashmap, size):
909
    """Produce the MD5 sum from the data in the hashmap."""
910

    
911
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
912
    md5 = hashlib.md5()
913
    bs = backend.block_size
914
    for bi, hash in enumerate(hashmap):
915
        data = backend.get_block(hash)  # Blocks come in padded.
916
        if bi == len(hashmap) - 1:
917
            data = data[:size % bs]
918
        md5.update(data)
919
    return md5.hexdigest().lower()
920

    
921

    
922
def simple_list_response(request, l):
923
    if request.serialization == 'text':
924
        return '\n'.join(l) + '\n'
925
    if request.serialization == 'xml':
926
        return render_to_string('items.xml', {'items': l})
927
    if request.serialization == 'json':
928
        return json.dumps(l)
929

    
930

    
931
from pithos.backends.util import PithosBackendPool
932
POOL_SIZE = 5
933
if RADOS_STORAGE:
934
    BLOCK_PARAMS = { 'mappool': RADOS_POOL_MAPS,
935
                     'blockpool': RADOS_POOL_BLOCKS,
936
                   }
937
else:
938
    BLOCK_PARAMS = { 'mappool': None,
939
                     'blockpool': None,
940
                   }
941

    
942

    
943
_pithos_backend_pool = PithosBackendPool(size=POOL_SIZE,
944
                                         db_module=BACKEND_DB_MODULE,
945
                                         db_connection=BACKEND_DB_CONNECTION,
946
                                         block_module=BACKEND_BLOCK_MODULE,
947
                                         block_path=BACKEND_BLOCK_PATH,
948
                                         block_umask=BACKEND_BLOCK_UMASK,
949
                                         queue_module=BACKEND_QUEUE_MODULE,
950
                                         queue_hosts=BACKEND_QUEUE_HOSTS,
951
                                         queue_exchange=BACKEND_QUEUE_EXCHANGE,
952
                                         quotaholder_url=QUOTAHOLDER_URL,
953
                                         quotaholder_token=QUOTAHOLDER_TOKEN,
954
                                         free_versioning=BACKEND_FREE_VERSIONING,
955
                                         block_params=BLOCK_PARAMS)
956

    
957
def get_backend():
958
    backend = _pithos_backend_pool.pool_get()
959
    backend.default_policy['quota'] = BACKEND_QUOTA
960
    backend.default_policy['versioning'] = BACKEND_VERSIONING
961
    backend.messages = []
962
    return backend
963

    
964

    
965
def update_request_headers(request):
966
    # Handle URL-encoded keys and values.
967
    meta = dict([(
968
        k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
969
    for k, v in meta.iteritems():
970
        try:
971
            k.decode('ascii')
972
            v.decode('ascii')
973
        except UnicodeDecodeError:
974
            raise BadRequest('Bad character in headers.')
975
        if '%' in k or '%' in v:
976
            del(request.META[k])
977
            request.META[unquote(k)] = smart_unicode(unquote(
978
                v), strings_only=True)
979

    
980

    
981
def update_response_headers(request, response):
982
    if request.serialization == 'xml':
983
        response['Content-Type'] = 'application/xml; charset=UTF-8'
984
    elif request.serialization == 'json':
985
        response['Content-Type'] = 'application/json; charset=UTF-8'
986
    elif not response['Content-Type']:
987
        response['Content-Type'] = 'text/plain; charset=UTF-8'
988

    
989
    if (not response.has_header('Content-Length') and
990
        not (response.has_header('Content-Type') and
991
             response['Content-Type'].startswith('multipart/byteranges'))):
992
        response['Content-Length'] = len(response.content)
993

    
994
    # URL-encode unicode in headers.
995
    meta = response.items()
996
    for k, v in meta:
997
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
998
                k.startswith('X-Object-') or k.startswith('Content-')):
999
            del(response[k])
1000
            response[quote(k)] = quote(v, safe='/=,:@; ')
1001

    
1002

    
1003
def render_fault(request, fault):
1004
    if isinstance(fault, InternalServerError) and settings.DEBUG:
1005
        fault.details = format_exc(fault)
1006

    
1007
    request.serialization = 'text'
1008
    data = fault.message + '\n'
1009
    if fault.details:
1010
        data += '\n' + fault.details
1011
    response = HttpResponse(data, status=fault.code)
1012
    update_response_headers(request, response)
1013
    return response
1014

    
1015

    
1016
def request_serialization(request, format_allowed=False):
1017
    """Return the serialization format requested.
1018

1019
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
1020
    """
1021

    
1022
    if not format_allowed:
1023
        return 'text'
1024

    
1025
    format = request.GET.get('format')
1026
    if format == 'json':
1027
        return 'json'
1028
    elif format == 'xml':
1029
        return 'xml'
1030

    
1031
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
1032
        accept, sep, rest = item.strip().partition(';')
1033
        if accept == 'application/json':
1034
            return 'json'
1035
        elif accept == 'application/xml' or accept == 'text/xml':
1036
            return 'xml'
1037

    
1038
    return 'text'
1039

    
1040
class User(unicode):
1041
    pass
1042

    
1043
def get_pithos_usage(usage):
1044
    for u in usage:
1045
        if u.get('name') == 'pithos+.diskspace':
1046
            return u
1047

    
1048
def api_method(http_method=None, format_allowed=False, user_required=True,
1049
        request_usage=False):
1050
    """Decorator function for views that implement an API method."""
1051

    
1052
    def decorator(func):
1053
        @wraps(func)
1054
        def wrapper(request, *args, **kwargs):
1055
            try:
1056
                if http_method and request.method != http_method:
1057
                    raise BadRequest('Method not allowed.')
1058

    
1059
                if user_required:
1060
                    token = None
1061
                    if request.method in ('HEAD', 'GET') and COOKIE_NAME in request.COOKIES:
1062
                        cookie_value = unquote(
1063
                            request.COOKIES.get(COOKIE_NAME, ''))
1064
                        account, sep, token = cookie_value.partition('|')
1065
                    get_user(request,
1066
                             AUTHENTICATION_URL,
1067
                             AUTHENTICATION_USERS,
1068
                             token,
1069
                             user_required)
1070
                    if  getattr(request, 'user', None) is None:
1071
                        raise Unauthorized('Access denied')
1072
                    assert getattr(request, 'user_uniq', None) != None
1073
                    request.user_uniq = User(request.user_uniq)
1074
                    request.user_uniq.uuid = request.user.get('uuid')
1075
                    request.user_usage = get_pithos_usage(
1076
                        request.user.get('usage', []))
1077
                
1078
                # The args variable may contain up to (account, container, object).
1079
                if len(args) > 1 and len(args[1]) > 256:
1080
                    raise BadRequest('Container name too large.')
1081
                if len(args) > 2 and len(args[2]) > 1024:
1082
                    raise BadRequest('Object name too large.')
1083

    
1084
                # Format and check headers.
1085
                update_request_headers(request)
1086

    
1087
                # Fill in custom request variables.
1088
                request.serialization = request_serialization(
1089
                    request, format_allowed)
1090
                request.backend = get_backend()
1091

    
1092
                response = func(request, *args, **kwargs)
1093
                update_response_headers(request, response)
1094
                return response
1095
            except Fault, fault:
1096
                if fault.code >= 500:
1097
                    logger.exception("API Fault")
1098
                return render_fault(request, fault)
1099
            except BaseException, e:
1100
                logger.exception('Unexpected error: %s' % e)
1101
                fault = InternalServerError('Unexpected error: %s' % e)
1102
                return render_fault(request, fault)
1103
            finally:
1104
                if getattr(request, 'backend', None) is not None:
1105
                    request.backend.close()
1106
        return wrapper
1107
    return decorator