Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 56f3c759

History | View | Annotate | Download (41 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
#
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
#
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
#
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from synnefo.lib.parsedate import parse_http_date_safe, parse_http_date
52
from synnefo.lib.astakos import get_user
53

    
54
from pithos.api.faults import (
55
    Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
56
    Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
57
    RangeNotSatisfiable, InternalServerError, NotImplemented)
58
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
59
                                 BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
60
                                 BACKEND_BLOCK_UMASK,
61
                                 BACKEND_QUEUE_MODULE, BACKEND_QUEUE_HOSTS,
62
                                 BACKEND_QUEUE_EXCHANGE, USE_QUOTAHOLDER,
63
                                 QUOTAHOLDER_URL, QUOTAHOLDER_TOKEN,
64
                                 QUOTAHOLDER_POOLSIZE,
65
                                 BACKEND_QUOTA, BACKEND_VERSIONING,
66
                                 BACKEND_FREE_VERSIONING,
67
                                 AUTHENTICATION_URL, AUTHENTICATION_USERS,
68
                                 COOKIE_NAME, USER_CATALOG_URL,
69
                                 RADOS_STORAGE, RADOS_POOL_BLOCKS,
70
                                 RADOS_POOL_MAPS, TRANSLATE_UUIDS,
71
                                 PUBLIC_URL_MIN_LENGTH,
72
                                 PUBLIC_URL_ALPHABET)
73
from pithos.backends import connect_backend
74
from pithos.backends.base import (NotAllowedError, QuotaError, ItemNotExists,
75
                                  VersionNotExists)
76
from synnefo.lib.astakos import (get_user_uuid, get_displayname,
77
                                 get_uuids, get_displaynames)
78

    
79
import logging
80
import re
81
import hashlib
82
import uuid
83
import decimal
84

    
85
logger = logging.getLogger(__name__)
86

    
87

    
88
class UTC(tzinfo):
89
    def utcoffset(self, dt):
90
        return timedelta(0)
91

    
92
    def tzname(self, dt):
93
        return 'UTC'
94

    
95
    def dst(self, dt):
96
        return timedelta(0)
97

    
98

    
99
def json_encode_decimal(obj):
100
    if isinstance(obj, decimal.Decimal):
101
        return str(obj)
102
    raise TypeError(repr(obj) + " is not JSON serializable")
103

    
104

    
105
def isoformat(d):
106
    """Return an ISO8601 date string that includes a timezone."""
107

    
108
    return d.replace(tzinfo=UTC()).isoformat()
109

    
110

    
111
def rename_meta_key(d, old, new):
112
    if old not in d:
113
        return
114
    d[new] = d[old]
115
    del(d[old])
116

    
117

    
118
def printable_header_dict(d):
119
    """Format a meta dictionary for printing out json/xml.
120

121
    Convert all keys to lower case and replace dashes with underscores.
122
    Format 'last_modified' timestamp.
123
    """
124

    
125
    if 'last_modified' in d and d['last_modified']:
126
        d['last_modified'] = isoformat(
127
            datetime.fromtimestamp(d['last_modified']))
128
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
129

    
130

    
131
def format_header_key(k):
132
    """Convert underscores to dashes and capitalize intra-dash strings."""
133
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
134

    
135

    
136
def get_header_prefix(request, prefix):
137
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
138

    
139
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
140
    # TODO: Document or remove '~' replacing.
141
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
142

    
143

    
144
def check_meta_headers(meta):
145
    if len(meta) > 90:
146
        raise BadRequest('Too many headers.')
147
    for k, v in meta.iteritems():
148
        if len(k) > 128:
149
            raise BadRequest('Header name too large.')
150
        if len(v) > 256:
151
            raise BadRequest('Header value too large.')
152

    
153

    
154
def get_account_headers(request):
155
    meta = get_header_prefix(request, 'X-Account-Meta-')
156
    check_meta_headers(meta)
157
    groups = {}
158
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
159
        n = k[16:].lower()
160
        if '-' in n or '_' in n:
161
            raise BadRequest('Bad characters in group name')
162
        groups[n] = v.replace(' ', '').split(',')
163
        while '' in groups[n]:
164
            groups[n].remove('')
165
    return meta, groups
166

    
167

    
168
def put_account_headers(response, meta, groups, policy):
169
    if 'count' in meta:
170
        response['X-Account-Container-Count'] = meta['count']
171
    if 'bytes' in meta:
172
        response['X-Account-Bytes-Used'] = meta['bytes']
173
    response['Last-Modified'] = http_date(int(meta['modified']))
174
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
175
        response[smart_str(
176
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
177
    if 'until_timestamp' in meta:
178
        response['X-Account-Until-Timestamp'] = http_date(
179
            int(meta['until_timestamp']))
180
    for k, v in groups.iteritems():
181
        k = smart_str(k, strings_only=True)
182
        k = format_header_key('X-Account-Group-' + k)
183
        v = smart_str(','.join(v), strings_only=True)
184
        response[k] = v
185
    for k, v in policy.iteritems():
186
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
187

    
188

    
189
def get_container_headers(request):
190
    meta = get_header_prefix(request, 'X-Container-Meta-')
191
    check_meta_headers(meta)
192
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
193
    return meta, policy
194

    
195

    
196
def put_container_headers(request, response, meta, policy):
197
    if 'count' in meta:
198
        response['X-Container-Object-Count'] = meta['count']
199
    if 'bytes' in meta:
200
        response['X-Container-Bytes-Used'] = meta['bytes']
201
    response['Last-Modified'] = http_date(int(meta['modified']))
202
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
203
        response[smart_str(
204
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
205
    l = [smart_str(x, strings_only=True) for x in meta['object_meta']
206
         if x.startswith('X-Object-Meta-')]
207
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
208
    response['X-Container-Block-Size'] = request.backend.block_size
209
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
210
    if 'until_timestamp' in meta:
211
        response['X-Container-Until-Timestamp'] = http_date(
212
            int(meta['until_timestamp']))
213
    for k, v in policy.iteritems():
214
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
215

    
216

    
217
def get_object_headers(request):
218
    content_type = request.META.get('CONTENT_TYPE', None)
219
    meta = get_header_prefix(request, 'X-Object-Meta-')
220
    check_meta_headers(meta)
221
    if request.META.get('HTTP_CONTENT_ENCODING'):
222
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
223
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
224
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
225
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
226
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
227
    return content_type, meta, get_sharing(request), get_public(request)
228

    
229

    
230
def put_object_headers(response, meta, restricted=False, token=None):
231
    response['ETag'] = meta['checksum']
232
    response['Content-Length'] = meta['bytes']
233
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
234
    response['Last-Modified'] = http_date(int(meta['modified']))
235
    if not restricted:
236
        response['X-Object-Hash'] = meta['hash']
237
        response['X-Object-UUID'] = meta['uuid']
238
        if TRANSLATE_UUIDS:
239
            meta['modified_by'] = retrieve_displayname(token, meta['modified_by'])
240
        response['X-Object-Modified-By'] = smart_str(
241
            meta['modified_by'], strings_only=True)
242
        response['X-Object-Version'] = meta['version']
243
        response['X-Object-Version-Timestamp'] = http_date(
244
            int(meta['version_timestamp']))
245
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
246
            response[smart_str(
247
                k, strings_only=True)] = smart_str(meta[k], strings_only=True)
248
        for k in (
249
            'Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
250
            'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
251
                'X-Object-Public'):
252
            if k in meta:
253
                response[k] = smart_str(meta[k], strings_only=True)
254
    else:
255
        for k in ('Content-Encoding', 'Content-Disposition'):
256
            if k in meta:
257
                response[k] = smart_str(meta[k], strings_only=True)
258

    
259

    
260
def update_manifest_meta(request, v_account, meta):
261
    """Update metadata if the object has an X-Object-Manifest."""
262

    
263
    if 'X-Object-Manifest' in meta:
264
        etag = ''
265
        bytes = 0
266
        try:
267
            src_container, src_name = split_container_object_string(
268
                '/' + meta['X-Object-Manifest'])
269
            objects = request.backend.list_objects(
270
                request.user_uniq, v_account,
271
                src_container, prefix=src_name, virtual=False)
272
            for x in objects:
273
                src_meta = request.backend.get_object_meta(request.user_uniq,
274
                                                           v_account, src_container, x[0], 'pithos', x[1])
275
                etag += src_meta['checksum']
276
                bytes += src_meta['bytes']
277
        except:
278
            # Ignore errors.
279
            return
280
        meta['bytes'] = bytes
281
        md5 = hashlib.md5()
282
        md5.update(etag)
283
        meta['checksum'] = md5.hexdigest().lower()
284

    
285
def is_uuid(str):
286
    if str is None:
287
        return False
288
    try:
289
        uuid.UUID(str)
290
    except ValueError:
291
        return False
292
    else:
293
       return True
294

    
295
##########################
296
# USER CATALOG utilities #
297
##########################
298

    
299
def retrieve_displayname(token, uuid, fail_silently=True):
300
    displayname = get_displayname(
301
            token, uuid, USER_CATALOG_URL, AUTHENTICATION_USERS)
302
    if not displayname and not fail_silently:
303
        raise ItemNotExists(uuid)
304
    elif not displayname:
305
        # just return the uuid
306
        return uuid
307
    return displayname
308

    
309
def retrieve_displaynames(token, uuids, return_dict=False, fail_silently=True):
310
    catalog =  get_displaynames(
311
            token, uuids, USER_CATALOG_URL, AUTHENTICATION_USERS) or {}
312
    missing = list(set(uuids) - set(catalog))
313
    if missing and not fail_silently:
314
        raise ItemNotExists('Unknown displaynames: %s' % ', '.join(missing))
315
    return catalog if return_dict else [catalog.get(i) for i in uuids]
316

    
317
def retrieve_uuid(token, displayname):
318
    if is_uuid(displayname):
319
        return displayname
320

    
321
    uuid = get_user_uuid(
322
        token, displayname, USER_CATALOG_URL, AUTHENTICATION_USERS)
323
    if not uuid:
324
        raise ItemNotExists(displayname)
325
    return uuid
326

    
327
def retrieve_uuids(token, displaynames, return_dict=False, fail_silently=True):
328
    catalog = get_uuids(
329
            token, displaynames, USER_CATALOG_URL, AUTHENTICATION_USERS) or {}
330
    missing = list(set(displaynames) - set(catalog))
331
    if missing and not fail_silently:
332
        raise ItemNotExists('Unknown uuids: %s' % ', '.join(missing))
333
    return catalog if return_dict else [catalog.get(i) for i in displaynames]
334

    
335
def replace_permissions_displayname(token, holder):
336
    if holder == '*':
337
        return holder
338
    try:
339
        # check first for a group permission
340
        account, group = holder.split(':', 1)
341
    except ValueError:
342
        return retrieve_uuid(token, holder)
343
    else:
344
        return ':'.join([retrieve_uuid(token, account), group])
345

    
346
def replace_permissions_uuid(token, holder):
347
    if holder == '*':
348
        return holder
349
    try:
350
        # check first for a group permission
351
        account, group = holder.split(':', 1)
352
    except ValueError:
353
        return retrieve_displayname(token, holder)
354
    else:
355
        return ':'.join([retrieve_displayname(token, account), group])
356

    
357
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
358
    if permissions is None:
359
        return
360
    allowed, perm_path, perms = permissions
361
    if len(perms) == 0:
362
        return
363

    
364
    # replace uuid with displayname
365
    if TRANSLATE_UUIDS:
366
        perms['read'] = [replace_permissions_uuid(
367
                getattr(request, 'token', None), x) \
368
                    for x in perms.get('read', [])]
369
        perms['write'] = [replace_permissions_uuid(
370
                getattr(request, 'token', None), x) \
371
                    for x in perms.get('write', [])]
372

    
373
    ret = []
374

    
375
    r = ','.join(perms.get('read', []))
376
    if r:
377
        ret.append('read=' + r)
378
    w = ','.join(perms.get('write', []))
379
    if w:
380
        ret.append('write=' + w)
381
    meta['X-Object-Sharing'] = '; '.join(ret)
382
    if '/'.join((v_account, v_container, v_object)) != perm_path:
383
        meta['X-Object-Shared-By'] = perm_path
384
    if request.user_uniq != v_account:
385
        meta['X-Object-Allowed-To'] = allowed
386

    
387

    
388
def update_public_meta(public, meta):
389
    if not public:
390
        return
391
    meta['X-Object-Public'] = '/public/' + public
392

    
393

    
394
def validate_modification_preconditions(request, meta):
395
    """Check that the modified timestamp conforms with the preconditions set."""
396

    
397
    if 'modified' not in meta:
398
        return  # TODO: Always return?
399

    
400
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
401
    if if_modified_since is not None:
402
        if_modified_since = parse_http_date_safe(if_modified_since)
403
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
404
        raise NotModified('Resource has not been modified')
405

    
406
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
407
    if if_unmodified_since is not None:
408
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
409
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
410
        raise PreconditionFailed('Resource has been modified')
411

    
412

    
413
def validate_matching_preconditions(request, meta):
414
    """Check that the ETag conforms with the preconditions set."""
415

    
416
    etag = meta['checksum']
417
    if not etag:
418
        etag = None
419

    
420
    if_match = request.META.get('HTTP_IF_MATCH')
421
    if if_match is not None:
422
        if etag is None:
423
            raise PreconditionFailed('Resource does not exist')
424
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
425
            raise PreconditionFailed('Resource ETag does not match')
426

    
427
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
428
    if if_none_match is not None:
429
        # TODO: If this passes, must ignore If-Modified-Since header.
430
        if etag is not None:
431
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
432
                # TODO: Continue if an If-Modified-Since header is present.
433
                if request.method in ('HEAD', 'GET'):
434
                    raise NotModified('Resource ETag matches')
435
                raise PreconditionFailed('Resource exists or ETag matches')
436

    
437

    
438
def split_container_object_string(s):
439
    if not len(s) > 0 or s[0] != '/':
440
        raise ValueError
441
    s = s[1:]
442
    pos = s.find('/')
443
    if pos == -1 or pos == len(s) - 1:
444
        raise ValueError
445
    return s[:pos], s[(pos + 1):]
446

    
447

    
448
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False, delimiter=None):
449
    """Copy or move an object."""
450

    
451
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
452
        del(request.META['CONTENT_TYPE'])
453
    content_type, meta, permissions, public = get_object_headers(request)
454
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
455
    try:
456
        if move:
457
            version_id = request.backend.move_object(
458
                request.user_uniq, src_account, src_container, src_name,
459
                dest_account, dest_container, dest_name,
460
                content_type, 'pithos', meta, False, permissions, delimiter)
461
        else:
462
            version_id = request.backend.copy_object(
463
                request.user_uniq, src_account, src_container, src_name,
464
                dest_account, dest_container, dest_name,
465
                content_type, 'pithos', meta, False, permissions, src_version, delimiter)
466
    except NotAllowedError:
467
        raise Forbidden('Not allowed')
468
    except (ItemNotExists, VersionNotExists):
469
        raise ItemNotFound('Container or object does not exist')
470
    except ValueError:
471
        raise BadRequest('Invalid sharing header')
472
    except QuotaError, e:
473
        raise RequestEntityTooLarge('Quota error: %s' % e)
474
    if public is not None:
475
        try:
476
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
477
        except NotAllowedError:
478
            raise Forbidden('Not allowed')
479
        except ItemNotExists:
480
            raise ItemNotFound('Object does not exist')
481
    return version_id
482

    
483

    
484
def get_int_parameter(p):
485
    if p is not None:
486
        try:
487
            p = int(p)
488
        except ValueError:
489
            return None
490
        if p < 0:
491
            return None
492
    return p
493

    
494

    
495
def get_content_length(request):
496
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
497
    if content_length is None:
498
        raise LengthRequired('Missing or invalid Content-Length header')
499
    return content_length
500

    
501

    
502
def get_range(request, size):
503
    """Parse a Range header from the request.
504

505
    Either returns None, when the header is not existent or should be ignored,
506
    or a list of (offset, length) tuples - should be further checked.
507
    """
508

    
509
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
510
    if not ranges.startswith('bytes='):
511
        return None
512

    
513
    ret = []
514
    for r in (x.strip() for x in ranges[6:].split(',')):
515
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
516
        m = p.match(r)
517
        if not m:
518
            return None
519
        offset = m.group('offset')
520
        upto = m.group('upto')
521
        if offset == '' and upto == '':
522
            return None
523

    
524
        if offset != '':
525
            offset = int(offset)
526
            if upto != '':
527
                upto = int(upto)
528
                if offset > upto:
529
                    return None
530
                ret.append((offset, upto - offset + 1))
531
            else:
532
                ret.append((offset, size - offset))
533
        else:
534
            length = int(upto)
535
            ret.append((size - length, length))
536

    
537
    return ret
538

    
539

    
540
def get_content_range(request):
541
    """Parse a Content-Range header from the request.
542

543
    Either returns None, when the header is not existent or should be ignored,
544
    or an (offset, length, total) tuple - check as length, total may be None.
545
    Returns (None, None, None) if the provided range is '*/*'.
546
    """
547

    
548
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
549
    if not ranges:
550
        return None
551

    
552
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
553
    m = p.match(ranges)
554
    if not m:
555
        if ranges == 'bytes */*':
556
            return (None, None, None)
557
        return None
558
    offset = int(m.group('offset'))
559
    upto = m.group('upto')
560
    total = m.group('total')
561
    if upto != '':
562
        upto = int(upto)
563
    else:
564
        upto = None
565
    if total != '*':
566
        total = int(total)
567
    else:
568
        total = None
569
    if (upto is not None and offset > upto) or \
570
        (total is not None and offset >= total) or \
571
            (total is not None and upto is not None and upto >= total):
572
        return None
573

    
574
    if upto is None:
575
        length = None
576
    else:
577
        length = upto - offset + 1
578
    return (offset, length, total)
579

    
580

    
581
def get_sharing(request):
582
    """Parse an X-Object-Sharing header from the request.
583

584
    Raises BadRequest on error.
585
    """
586

    
587
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
588
    if permissions is None:
589
        return None
590

    
591
    # TODO: Document or remove '~' replacing.
592
    permissions = permissions.replace('~', '')
593

    
594
    ret = {}
595
    permissions = permissions.replace(' ', '')
596
    if permissions == '':
597
        return ret
598
    for perm in (x for x in permissions.split(';')):
599
        if perm.startswith('read='):
600
            ret['read'] = list(set(
601
                [v.replace(' ', '').lower() for v in perm[5:].split(',')]))
602
            if '' in ret['read']:
603
                ret['read'].remove('')
604
            if '*' in ret['read']:
605
                ret['read'] = ['*']
606
            if len(ret['read']) == 0:
607
                raise BadRequest(
608
                    'Bad X-Object-Sharing header value: invalid length')
609
        elif perm.startswith('write='):
610
            ret['write'] = list(set(
611
                [v.replace(' ', '').lower() for v in perm[6:].split(',')]))
612
            if '' in ret['write']:
613
                ret['write'].remove('')
614
            if '*' in ret['write']:
615
                ret['write'] = ['*']
616
            if len(ret['write']) == 0:
617
                raise BadRequest(
618
                    'Bad X-Object-Sharing header value: invalid length')
619
        else:
620
            raise BadRequest(
621
                'Bad X-Object-Sharing header value: missing prefix')
622

    
623
    # replace displayname with uuid
624
    if TRANSLATE_UUIDS:
625
        try:
626
            ret['read'] = [replace_permissions_displayname(
627
                    getattr(request, 'token', None), x) \
628
                        for x in ret.get('read', [])]
629
            ret['write'] = [replace_permissions_displayname(
630
                    getattr(request, 'token', None), x) \
631
                        for x in ret.get('write', [])]
632
        except ItemNotExists, e:
633
            raise BadRequest(
634
                'Bad X-Object-Sharing header value: unknown account: %s' % e)
635

    
636
    # Keep duplicates only in write list.
637
    dups = [x for x in ret.get(
638
        'read', []) if x in ret.get('write', []) and x != '*']
639
    if dups:
640
        for x in dups:
641
            ret['read'].remove(x)
642
        if len(ret['read']) == 0:
643
            del(ret['read'])
644

    
645
    return ret
646

    
647

    
648
def get_public(request):
649
    """Parse an X-Object-Public header from the request.
650

651
    Raises BadRequest on error.
652
    """
653

    
654
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
655
    if public is None:
656
        return None
657

    
658
    public = public.replace(' ', '').lower()
659
    if public == 'true':
660
        return True
661
    elif public == 'false' or public == '':
662
        return False
663
    raise BadRequest('Bad X-Object-Public header value')
664

    
665

    
666
def raw_input_socket(request):
667
    """Return the socket for reading the rest of the request."""
668

    
669
    server_software = request.META.get('SERVER_SOFTWARE')
670
    if server_software and server_software.startswith('mod_python'):
671
        return request._req
672
    if 'wsgi.input' in request.environ:
673
        return request.environ['wsgi.input']
674
    raise NotImplemented('Unknown server software')
675

    
676
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024)  # 5GB
677

    
678

    
679
def socket_read_iterator(request, length=0, blocksize=4096):
680
    """Return a maximum of blocksize data read from the socket in each iteration.
681

682
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
683
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
684
    """
685

    
686
    sock = raw_input_socket(request)
687
    if length < 0:  # Chunked transfers
688
        # Small version (server does the dechunking).
689
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
690
            while length < MAX_UPLOAD_SIZE:
691
                data = sock.read(blocksize)
692
                if data == '':
693
                    return
694
                yield data
695
            raise BadRequest('Maximum size is reached')
696

    
697
        # Long version (do the dechunking).
698
        data = ''
699
        while length < MAX_UPLOAD_SIZE:
700
            # Get chunk size.
701
            if hasattr(sock, 'readline'):
702
                chunk_length = sock.readline()
703
            else:
704
                chunk_length = ''
705
                while chunk_length[-1:] != '\n':
706
                    chunk_length += sock.read(1)
707
                chunk_length.strip()
708
            pos = chunk_length.find(';')
709
            if pos >= 0:
710
                chunk_length = chunk_length[:pos]
711
            try:
712
                chunk_length = int(chunk_length, 16)
713
            except Exception, e:
714
                raise BadRequest('Bad chunk size')
715
                                 # TODO: Change to something more appropriate.
716
            # Check if done.
717
            if chunk_length == 0:
718
                if len(data) > 0:
719
                    yield data
720
                return
721
            # Get the actual data.
722
            while chunk_length > 0:
723
                chunk = sock.read(min(chunk_length, blocksize))
724
                chunk_length -= len(chunk)
725
                if length > 0:
726
                    length += len(chunk)
727
                data += chunk
728
                if len(data) >= blocksize:
729
                    ret = data[:blocksize]
730
                    data = data[blocksize:]
731
                    yield ret
732
            sock.read(2)  # CRLF
733
        raise BadRequest('Maximum size is reached')
734
    else:
735
        if length > MAX_UPLOAD_SIZE:
736
            raise BadRequest('Maximum size is reached')
737
        while length > 0:
738
            data = sock.read(min(length, blocksize))
739
            if not data:
740
                raise BadRequest()
741
            length -= len(data)
742
            yield data
743

    
744

    
745
class SaveToBackendHandler(FileUploadHandler):
746
    """Handle a file from an HTML form the django way."""
747

    
748
    def __init__(self, request=None):
749
        super(SaveToBackendHandler, self).__init__(request)
750
        self.backend = request.backend
751

    
752
    def put_data(self, length):
753
        if len(self.data) >= length:
754
            block = self.data[:length]
755
            self.file.hashmap.append(self.backend.put_block(block))
756
            self.md5.update(block)
757
            self.data = self.data[length:]
758

    
759
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
760
        self.md5 = hashlib.md5()
761
        self.data = ''
762
        self.file = UploadedFile(
763
            name=file_name, content_type=content_type, charset=charset)
764
        self.file.size = 0
765
        self.file.hashmap = []
766

    
767
    def receive_data_chunk(self, raw_data, start):
768
        self.data += raw_data
769
        self.file.size += len(raw_data)
770
        self.put_data(self.request.backend.block_size)
771
        return None
772

    
773
    def file_complete(self, file_size):
774
        l = len(self.data)
775
        if l > 0:
776
            self.put_data(l)
777
        self.file.etag = self.md5.hexdigest().lower()
778
        return self.file
779

    
780

    
781
class ObjectWrapper(object):
782
    """Return the object's data block-per-block in each iteration.
783

784
    Read from the object using the offset and length provided in each entry of the range list.
785
    """
786

    
787
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
788
        self.backend = backend
789
        self.ranges = ranges
790
        self.sizes = sizes
791
        self.hashmaps = hashmaps
792
        self.boundary = boundary
793
        self.size = sum(self.sizes)
794

    
795
        self.file_index = 0
796
        self.block_index = 0
797
        self.block_hash = -1
798
        self.block = ''
799

    
800
        self.range_index = -1
801
        self.offset, self.length = self.ranges[0]
802

    
803
    def __iter__(self):
804
        return self
805

    
806
    def part_iterator(self):
807
        if self.length > 0:
808
            # Get the file for the current offset.
809
            file_size = self.sizes[self.file_index]
810
            while self.offset >= file_size:
811
                self.offset -= file_size
812
                self.file_index += 1
813
                file_size = self.sizes[self.file_index]
814

    
815
            # Get the block for the current position.
816
            self.block_index = int(self.offset / self.backend.block_size)
817
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
818
                self.block_hash = self.hashmaps[
819
                    self.file_index][self.block_index]
820
                try:
821
                    self.block = self.backend.get_block(self.block_hash)
822
                except ItemNotExists:
823
                    raise ItemNotFound('Block does not exist')
824

    
825
            # Get the data from the block.
826
            bo = self.offset % self.backend.block_size
827
            bs = self.backend.block_size
828
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
829
                    self.sizes[self.file_index] % self.backend.block_size):
830
                bs = self.sizes[self.file_index] % self.backend.block_size
831
            bl = min(self.length, bs - bo)
832
            data = self.block[bo:bo + bl]
833
            self.offset += bl
834
            self.length -= bl
835
            return data
836
        else:
837
            raise StopIteration
838

    
839
    def next(self):
840
        if len(self.ranges) == 1:
841
            return self.part_iterator()
842
        if self.range_index == len(self.ranges):
843
            raise StopIteration
844
        try:
845
            if self.range_index == -1:
846
                raise StopIteration
847
            return self.part_iterator()
848
        except StopIteration:
849
            self.range_index += 1
850
            out = []
851
            if self.range_index < len(self.ranges):
852
                # Part header.
853
                self.offset, self.length = self.ranges[self.range_index]
854
                self.file_index = 0
855
                if self.range_index > 0:
856
                    out.append('')
857
                out.append('--' + self.boundary)
858
                out.append('Content-Range: bytes %d-%d/%d' % (
859
                    self.offset, self.offset + self.length - 1, self.size))
860
                out.append('Content-Transfer-Encoding: binary')
861
                out.append('')
862
                out.append('')
863
                return '\r\n'.join(out)
864
            else:
865
                # Footer.
866
                out.append('')
867
                out.append('--' + self.boundary + '--')
868
                out.append('')
869
                return '\r\n'.join(out)
870

    
871

    
872
def object_data_response(request, sizes, hashmaps, meta, public=False):
873
    """Get the HttpResponse object for replying with the object's data."""
874

    
875
    # Range handling.
876
    size = sum(sizes)
877
    ranges = get_range(request, size)
878
    if ranges is None:
879
        ranges = [(0, size)]
880
        ret = 200
881
    else:
882
        check = [True for offset, length in ranges if
883
                 length <= 0 or length > size or
884
                 offset < 0 or offset >= size or
885
                 offset + length > size]
886
        if len(check) > 0:
887
            raise RangeNotSatisfiable('Requested range exceeds object limits')
888
        ret = 206
889
        if_range = request.META.get('HTTP_IF_RANGE')
890
        if if_range:
891
            try:
892
                # Modification time has passed instead.
893
                last_modified = parse_http_date(if_range)
894
                if last_modified != meta['modified']:
895
                    ranges = [(0, size)]
896
                    ret = 200
897
            except ValueError:
898
                if if_range != meta['checksum']:
899
                    ranges = [(0, size)]
900
                    ret = 200
901

    
902
    if ret == 206 and len(ranges) > 1:
903
        boundary = uuid.uuid4().hex
904
    else:
905
        boundary = ''
906
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
907
    response = HttpResponse(wrapper, status=ret)
908
    put_object_headers(
909
            response, meta, restricted=public, token=getattr(request, 'token', None))
910
    if ret == 206:
911
        if len(ranges) == 1:
912
            offset, length = ranges[0]
913
            response[
914
                'Content-Length'] = length  # Update with the correct length.
915
            response['Content-Range'] = 'bytes %d-%d/%d' % (
916
                offset, offset + length - 1, size)
917
        else:
918
            del(response['Content-Length'])
919
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (
920
                boundary,)
921
    return response
922

    
923

    
924
def put_object_block(request, hashmap, data, offset):
925
    """Put one block of data at the given offset."""
926

    
927
    bi = int(offset / request.backend.block_size)
928
    bo = offset % request.backend.block_size
929
    bl = min(len(data), request.backend.block_size - bo)
930
    if bi < len(hashmap):
931
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
932
    else:
933
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
934
    return bl  # Return ammount of data written.
935

    
936

    
937
def hashmap_md5(backend, hashmap, size):
938
    """Produce the MD5 sum from the data in the hashmap."""
939

    
940
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
941
    md5 = hashlib.md5()
942
    bs = backend.block_size
943
    for bi, hash in enumerate(hashmap):
944
        data = backend.get_block(hash)  # Blocks come in padded.
945
        if bi == len(hashmap) - 1:
946
            data = data[:size % bs]
947
        md5.update(data)
948
    return md5.hexdigest().lower()
949

    
950

    
951
def simple_list_response(request, l):
952
    if request.serialization == 'text':
953
        return '\n'.join(l) + '\n'
954
    if request.serialization == 'xml':
955
        return render_to_string('items.xml', {'items': l})
956
    if request.serialization == 'json':
957
        return json.dumps(l)
958

    
959

    
960
from pithos.backends.util import PithosBackendPool
961
POOL_SIZE = 5
962
if RADOS_STORAGE:
963
    BLOCK_PARAMS = { 'mappool': RADOS_POOL_MAPS,
964
                     'blockpool': RADOS_POOL_BLOCKS,
965
                   }
966
else:
967
    BLOCK_PARAMS = { 'mappool': None,
968
                     'blockpool': None,
969
                   }
970

    
971

    
972
_pithos_backend_pool = PithosBackendPool(
973
        size=POOL_SIZE,
974
        db_module=BACKEND_DB_MODULE,
975
        db_connection=BACKEND_DB_CONNECTION,
976
        block_module=BACKEND_BLOCK_MODULE,
977
        block_path=BACKEND_BLOCK_PATH,
978
        block_umask=BACKEND_BLOCK_UMASK,
979
        queue_module=BACKEND_QUEUE_MODULE,
980
        queue_hosts=BACKEND_QUEUE_HOSTS,
981
        queue_exchange=BACKEND_QUEUE_EXCHANGE,
982
        quotaholder_enabled=USE_QUOTAHOLDER,
983
        quotaholder_url=QUOTAHOLDER_URL,
984
        quotaholder_token=QUOTAHOLDER_TOKEN,
985
        quotaholder_client_poolsize=QUOTAHOLDER_POOLSIZE,
986
        free_versioning=BACKEND_FREE_VERSIONING,
987
        block_params=BLOCK_PARAMS,
988
        public_url_min_length=PUBLIC_URL_MIN_LENGTH,
989
        public_url_alphabet=PUBLIC_URL_ALPHABET)
990

    
991
def get_backend():
992
    backend = _pithos_backend_pool.pool_get()
993
    backend.default_policy['quota'] = BACKEND_QUOTA
994
    backend.default_policy['versioning'] = BACKEND_VERSIONING
995
    backend.messages = []
996
    return backend
997

    
998

    
999
def update_request_headers(request):
1000
    # Handle URL-encoded keys and values.
1001
    meta = dict([(
1002
        k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
1003
    for k, v in meta.iteritems():
1004
        try:
1005
            k.decode('ascii')
1006
            v.decode('ascii')
1007
        except UnicodeDecodeError:
1008
            raise BadRequest('Bad character in headers.')
1009
        if '%' in k or '%' in v:
1010
            del(request.META[k])
1011
            request.META[unquote(k)] = smart_unicode(unquote(
1012
                v), strings_only=True)
1013

    
1014

    
1015
def update_response_headers(request, response):
1016
    if request.serialization == 'xml':
1017
        response['Content-Type'] = 'application/xml; charset=UTF-8'
1018
    elif request.serialization == 'json':
1019
        response['Content-Type'] = 'application/json; charset=UTF-8'
1020
    elif not response['Content-Type']:
1021
        response['Content-Type'] = 'text/plain; charset=UTF-8'
1022

    
1023
    if (not response.has_header('Content-Length') and
1024
        not (response.has_header('Content-Type') and
1025
             response['Content-Type'].startswith('multipart/byteranges'))):
1026
        response['Content-Length'] = len(response.content)
1027

    
1028
    # URL-encode unicode in headers.
1029
    meta = response.items()
1030
    for k, v in meta:
1031
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
1032
                k.startswith('X-Object-') or k.startswith('Content-')):
1033
            del(response[k])
1034
            response[quote(k)] = quote(v, safe='/=,:@; ')
1035

    
1036

    
1037
def render_fault(request, fault):
1038
    if isinstance(fault, InternalServerError) and settings.DEBUG:
1039
        fault.details = format_exc(fault)
1040

    
1041
    request.serialization = 'text'
1042
    data = fault.message + '\n'
1043
    if fault.details:
1044
        data += '\n' + fault.details
1045
    response = HttpResponse(data, status=fault.code)
1046
    update_response_headers(request, response)
1047
    return response
1048

    
1049

    
1050
def request_serialization(request, format_allowed=False):
1051
    """Return the serialization format requested.
1052

1053
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
1054
    """
1055

    
1056
    if not format_allowed:
1057
        return 'text'
1058

    
1059
    format = request.GET.get('format')
1060
    if format == 'json':
1061
        return 'json'
1062
    elif format == 'xml':
1063
        return 'xml'
1064

    
1065
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
1066
        accept, sep, rest = item.strip().partition(';')
1067
        if accept == 'application/json':
1068
            return 'json'
1069
        elif accept == 'application/xml' or accept == 'text/xml':
1070
            return 'xml'
1071

    
1072
    return 'text'
1073

    
1074
def get_pithos_usage(usage):
1075
    for u in usage:
1076
        if u.get('name') == 'pithos+.diskspace':
1077
            return u
1078

    
1079
def api_method(http_method=None, format_allowed=False, user_required=True,
1080
        request_usage=False):
1081
    """Decorator function for views that implement an API method."""
1082

    
1083
    def decorator(func):
1084
        @wraps(func)
1085
        def wrapper(request, *args, **kwargs):
1086
            try:
1087
                if http_method and request.method != http_method:
1088
                    raise BadRequest('Method not allowed.')
1089

    
1090
                if user_required:
1091
                    token = None
1092
                    if request.method in ('HEAD', 'GET') and COOKIE_NAME in request.COOKIES:
1093
                        cookie_value = unquote(
1094
                            request.COOKIES.get(COOKIE_NAME, ''))
1095
                        account, sep, token = cookie_value.partition('|')
1096
                    get_user(request,
1097
                             AUTHENTICATION_URL,
1098
                             AUTHENTICATION_USERS,
1099
                             token,
1100
                             request_usage)
1101
                    if  getattr(request, 'user', None) is None:
1102
                        raise Unauthorized('Access denied')
1103
                    assert getattr(request, 'user_uniq', None) != None
1104
                    request.user_usage = get_pithos_usage(request.user.get('usage', []))
1105
                    request.token = request.GET.get('X-Auth-Token', request.META.get('HTTP_X_AUTH_TOKEN', token))
1106

    
1107
                # The args variable may contain up to (account, container, object).
1108
                if len(args) > 1 and len(args[1]) > 256:
1109
                    raise BadRequest('Container name too large.')
1110
                if len(args) > 2 and len(args[2]) > 1024:
1111
                    raise BadRequest('Object name too large.')
1112

    
1113
                # Format and check headers.
1114
                update_request_headers(request)
1115

    
1116
                # Fill in custom request variables.
1117
                request.serialization = request_serialization(
1118
                    request, format_allowed)
1119
                request.backend = get_backend()
1120

    
1121
                response = func(request, *args, **kwargs)
1122
                update_response_headers(request, response)
1123
                return response
1124
            except Fault, fault:
1125
                if fault.code >= 500:
1126
                    logger.exception("API Fault")
1127
                return render_fault(request, fault)
1128
            except BaseException, e:
1129
                logger.exception('Unexpected error: %s' % e)
1130
                fault = InternalServerError('Unexpected error')
1131
                return render_fault(request, fault)
1132
            finally:
1133
                if getattr(request, 'backend', None) is not None:
1134
                    request.backend.close()
1135
        return wrapper
1136
    return decorator