Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 876d7486

History | View | Annotate | Download (45.9 kB)

1
# Copyright 2011-2013 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
#
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
#
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
#
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from datetime import datetime
36
from urllib import quote, unquote, urlencode
37

    
38
from django.http import (HttpResponse, Http404, HttpResponseRedirect,
39
                         HttpResponseNotAllowed)
40
from django.template.loader import render_to_string
41
from django.utils import simplejson as json
42
from django.utils.http import http_date, parse_etags
43
from django.utils.encoding import smart_unicode, smart_str
44
from django.core.files.uploadhandler import FileUploadHandler
45
from django.core.files.uploadedfile import UploadedFile
46
from django.core.urlresolvers import reverse
47
from django.core.exceptions import PermissionDenied
48

    
49
from snf_django.lib.api.parsedate import parse_http_date_safe, parse_http_date
50
from snf_django.lib import api
51
from snf_django.lib.api import faults, utils
52

    
53
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
54
                                 BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
55
                                 BACKEND_BLOCK_UMASK,
56
                                 BACKEND_QUEUE_MODULE, BACKEND_QUEUE_HOSTS,
57
                                 BACKEND_QUEUE_EXCHANGE,
58
                                 ASTAKOSCLIENT_POOLSIZE,
59
                                 SERVICE_TOKEN,
60
                                 ASTAKOS_AUTH_URL,
61
                                 BACKEND_ACCOUNT_QUOTA,
62
                                 BACKEND_CONTAINER_QUOTA,
63
                                 BACKEND_VERSIONING, BACKEND_FREE_VERSIONING,
64
                                 BACKEND_POOL_ENABLED, BACKEND_POOL_SIZE,
65
                                 BACKEND_BLOCK_SIZE, BACKEND_HASH_ALGORITHM,
66
                                 BACKEND_ARCHIPELAGO_CONF,
67
                                 BACKEND_XSEG_POOL_SIZE,
68
                                 BACKEND_MAP_CHECK_INTERVAL,
69
                                 RADOS_STORAGE, RADOS_POOL_BLOCKS,
70
                                 RADOS_POOL_MAPS, TRANSLATE_UUIDS,
71
                                 PUBLIC_URL_SECURITY, PUBLIC_URL_ALPHABET,
72
                                 BASE_HOST, UPDATE_MD5, VIEW_PREFIX,
73
                                 OAUTH2_CLIENT_CREDENTIALS, UNSAFE_DOMAIN)
74

    
75
from pithos.api.resources import resources
76
from pithos.backends import connect_backend
77
from pithos.backends.base import (NotAllowedError, QuotaError, ItemNotExists,
78
                                  VersionNotExists, IllegalOperationError)
79

    
80
from synnefo.lib import join_urls
81
from synnefo.util import text
82

    
83
from astakosclient import AstakosClient
84
from astakosclient.errors import NoUserName, NoUUID, AstakosClientException
85

    
86
import logging
87
import re
88
import hashlib
89
import uuid
90
import decimal
91

    
92
logger = logging.getLogger(__name__)
93

    
94

    
95
def json_encode_decimal(obj):
96
    if isinstance(obj, decimal.Decimal):
97
        return str(obj)
98
    raise TypeError(repr(obj) + " is not JSON serializable")
99

    
100

    
101
def rename_meta_key(d, old, new):
102
    if old not in d:
103
        return
104
    d[new] = d[old]
105
    del(d[old])
106

    
107

    
108
def printable_header_dict(d):
109
    """Format a meta dictionary for printing out json/xml.
110

111
    Convert all keys to lower case and replace dashes with underscores.
112
    Format 'last_modified' timestamp.
113
    """
114

    
115
    timestamps = ('last_modified', 'x_container_until_timestamp',
116
                  'x_acount_until_timestamp')
117
    for timestamp in timestamps:
118
        if timestamp in d and d[timestamp]:
119
            d[timestamp] = utils.isoformat(
120
                datetime.fromtimestamp(d[timestamp]))
121
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
122

    
123

    
124
def format_header_key(k):
125
    """Convert underscores to dashes and capitalize intra-dash strings."""
126
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
127

    
128

    
129
def get_header_prefix(request, prefix):
130
    """Get all prefix-* request headers in a dict.
131
       Reformat keys with format_header_key()."""
132

    
133
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
134
    # TODO: Document or remove '~' replacing.
135
    return dict([(format_header_key(k[5:]), v.replace('~', ''))
136
                for k, v in request.META.iteritems()
137
                if k.startswith(prefix) and len(k) > len(prefix)])
138

    
139

    
140
def check_meta_headers(meta):
141
    if len(meta) > 90:
142
        raise faults.BadRequest('Too many headers.')
143
    for k, v in meta.iteritems():
144
        if len(k) > 128:
145
            raise faults.BadRequest('Header name too large.')
146
        if len(v) > 256:
147
            raise faults.BadRequest('Header value too large.')
148

    
149

    
150
def get_account_headers(request):
151
    meta = get_header_prefix(request, 'X-Account-Meta-')
152
    check_meta_headers(meta)
153
    groups = {}
154
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
155
        n = k[16:].lower()
156
        if '-' in n or '_' in n:
157
            raise faults.BadRequest('Bad characters in group name')
158
        groups[n] = v.replace(' ', '').split(',')
159
        while '' in groups[n]:
160
            groups[n].remove('')
161
    return meta, groups
162

    
163

    
164
def put_account_headers(response, meta, groups, policy):
165
    if 'count' in meta:
166
        response['X-Account-Container-Count'] = meta['count']
167
    if 'bytes' in meta:
168
        response['X-Account-Bytes-Used'] = meta['bytes']
169
    response['Last-Modified'] = http_date(int(meta['modified']))
170
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
171
        response[smart_str(
172
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
173
    if 'until_timestamp' in meta:
174
        response['X-Account-Until-Timestamp'] = http_date(
175
            int(meta['until_timestamp']))
176
    for k, v in groups.iteritems():
177
        k = smart_str(k, strings_only=True)
178
        k = format_header_key('X-Account-Group-' + k)
179
        v = smart_str(','.join(v), strings_only=True)
180
        response[k] = v
181
    for k, v in policy.iteritems():
182
        response[smart_str(format_header_key('X-Account-Policy-' + k),
183
                 strings_only=True)] = smart_str(v, strings_only=True)
184

    
185

    
186
def get_container_headers(request):
187
    meta = get_header_prefix(request, 'X-Container-Meta-')
188
    check_meta_headers(meta)
189
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in
190
                  get_header_prefix(request,
191
                                    'X-Container-Policy-').iteritems()])
192
    return meta, policy
193

    
194

    
195
def put_container_headers(request, response, meta, policy):
196
    if 'count' in meta:
197
        response['X-Container-Object-Count'] = meta['count']
198
    if 'bytes' in meta:
199
        response['X-Container-Bytes-Used'] = meta['bytes']
200
    response['Last-Modified'] = http_date(int(meta['modified']))
201
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
202
        response[smart_str(
203
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
204
    l = [smart_str(x, strings_only=True) for x in meta['object_meta']
205
         if x.startswith('X-Object-Meta-')]
206
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
207
    response['X-Container-Block-Size'] = request.backend.block_size
208
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
209
    if 'until_timestamp' in meta:
210
        response['X-Container-Until-Timestamp'] = http_date(
211
            int(meta['until_timestamp']))
212
    for k, v in policy.iteritems():
213
        response[smart_str(format_header_key('X-Container-Policy-' + k),
214
                           strings_only=True)] = smart_str(v,
215
                                                           strings_only=True)
216

    
217

    
218
def get_object_headers(request):
219
    content_type = request.META.get('CONTENT_TYPE', None)
220
    meta = get_header_prefix(request, 'X-Object-Meta-')
221
    check_meta_headers(meta)
222
    if request.META.get('HTTP_CONTENT_ENCODING'):
223
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
224
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
225
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
226
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
227
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
228
    return content_type, meta, get_sharing(request), get_public(request)
229

    
230

    
231
def put_object_headers(response, meta, restricted=False, token=None):
232
    response['ETag'] = meta['hash'] if not UPDATE_MD5 else meta['checksum']
233
    response['Content-Length'] = meta['bytes']
234
    response.override_serialization = True
235
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
236
    response['Last-Modified'] = http_date(int(meta['modified']))
237
    response['Map-Exists'] = meta['available']
238
    response['Map-Checked-At'] = (
239
        http_date(int(meta['map_check_timestamp'])) if
240
        meta['map_check_timestamp'] is not None else '')
241
    if not restricted:
242
        response['X-Object-Hash'] = meta['hash']
243
        response['X-Object-UUID'] = meta['uuid']
244
        if TRANSLATE_UUIDS:
245
            meta['modified_by'] = \
246
                retrieve_displayname(token, meta['modified_by'])
247
        response['X-Object-Modified-By'] = smart_str(
248
            meta['modified_by'], strings_only=True)
249
        response['X-Object-Version'] = meta['version']
250
        response['X-Object-Version-Timestamp'] = http_date(
251
            int(meta['version_timestamp']))
252
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
253
            response[smart_str(
254
                k, strings_only=True)] = smart_str(meta[k], strings_only=True)
255
        for k in (
256
            'Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
257
            'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
258
                'X-Object-Public'):
259
            if k in meta:
260
                response[k] = smart_str(meta[k], strings_only=True)
261
    else:
262
        for k in ('Content-Encoding', 'Content-Disposition'):
263
            if k in meta:
264
                response[k] = smart_str(meta[k], strings_only=True)
265

    
266

    
267
def update_manifest_meta(request, v_account, meta):
268
    """Update metadata if the object has an X-Object-Manifest."""
269

    
270
    if 'X-Object-Manifest' in meta:
271
        etag = ''
272
        bytes = 0
273
        try:
274
            src_container, src_name = split_container_object_string(
275
                '/' + meta['X-Object-Manifest'])
276
            objects = request.backend.list_objects(
277
                request.user_uniq, v_account,
278
                src_container, prefix=src_name, virtual=False)
279
            for x in objects:
280
                src_meta = request.backend.get_object_meta(
281
                    request.user_uniq, v_account, src_container, x[0],
282
                    'pithos', x[1])
283
                etag += (src_meta['hash'] if not UPDATE_MD5 else
284
                         src_meta['checksum'])
285
                bytes += src_meta['bytes']
286
        except:
287
            # Ignore errors.
288
            return
289
        meta['bytes'] = bytes
290
        md5 = hashlib.md5()
291
        md5.update(etag)
292
        meta['checksum'] = md5.hexdigest().lower()
293

    
294

    
295
def is_uuid(str):
296
    if str is None:
297
        return False
298
    try:
299
        uuid.UUID(str)
300
    except ValueError:
301
        return False
302
    else:
303
        return True
304

    
305

    
306
##########################
307
# USER CATALOG utilities #
308
##########################
309

    
310
def retrieve_displayname(token, uuid, fail_silently=True):
311
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
312
                            retry=2, use_pool=True,
313
                            logger=logger)
314
    try:
315
        displayname = astakos.get_username(uuid)
316
    except NoUserName:
317
        if not fail_silently:
318
            raise ItemNotExists(uuid)
319
        else:
320
            # just return the uuid
321
            return uuid
322
    return displayname
323

    
324

    
325
def retrieve_displaynames(token, uuids, return_dict=False, fail_silently=True):
326
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
327
                            retry=2, use_pool=True,
328
                            logger=logger)
329
    catalog = astakos.get_usernames(uuids) or {}
330
    missing = list(set(uuids) - set(catalog))
331
    if missing and not fail_silently:
332
        raise ItemNotExists('Unknown displaynames: %s' % ', '.join(missing))
333
    return catalog if return_dict else [catalog.get(i) for i in uuids]
334

    
335

    
336
def retrieve_uuid(token, displayname):
337
    if is_uuid(displayname):
338
        return displayname
339

    
340
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
341
                            retry=2, use_pool=True,
342
                            logger=logger)
343
    try:
344
        uuid = astakos.get_uuid(displayname)
345
    except NoUUID:
346
        raise ItemNotExists(displayname)
347
    return uuid
348

    
349

    
350
def retrieve_uuids(token, displaynames, return_dict=False, fail_silently=True):
351
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
352
                            retry=2, use_pool=True,
353
                            logger=logger)
354
    catalog = astakos.get_uuids(displaynames) or {}
355
    missing = list(set(displaynames) - set(catalog))
356
    if missing and not fail_silently:
357
        raise ItemNotExists('Unknown uuids: %s' % ', '.join(missing))
358
    return catalog if return_dict else [catalog.get(i) for i in displaynames]
359

    
360

    
361
def replace_permissions_displayname(token, holder):
362
    if holder == '*':
363
        return holder
364
    try:
365
        # check first for a group permission
366
        account, group = holder.split(':', 1)
367
    except ValueError:
368
        return retrieve_uuid(token, holder)
369
    else:
370
        return ':'.join([retrieve_uuid(token, account), group])
371

    
372

    
373
def replace_permissions_uuid(token, holder):
374
    if holder == '*':
375
        return holder
376
    try:
377
        # check first for a group permission
378
        account, group = holder.split(':', 1)
379
    except ValueError:
380
        return retrieve_displayname(token, holder)
381
    else:
382
        return ':'.join([retrieve_displayname(token, account), group])
383

    
384

    
385
def update_sharing_meta(request, permissions, v_account,
386
                        v_container, v_object, meta):
387
    if permissions is None:
388
        return
389
    allowed, perm_path, perms = permissions
390
    if len(perms) == 0:
391
        return
392

    
393
    # replace uuid with displayname
394
    if TRANSLATE_UUIDS:
395
        perms['read'] = [replace_permissions_uuid(
396
            getattr(request, 'token', None), x)
397
            for x in perms.get('read', [])]
398
        perms['write'] = [replace_permissions_uuid(
399
            getattr(request, 'token', None), x)
400
            for x in perms.get('write', [])]
401

    
402
    ret = []
403

    
404
    r = ','.join(perms.get('read', []))
405
    if r:
406
        ret.append('read=' + r)
407
    w = ','.join(perms.get('write', []))
408
    if w:
409
        ret.append('write=' + w)
410
    meta['X-Object-Sharing'] = '; '.join(ret)
411
    if '/'.join((v_account, v_container, v_object)) != perm_path:
412
        meta['X-Object-Shared-By'] = perm_path
413
    if request.user_uniq != v_account:
414
        meta['X-Object-Allowed-To'] = allowed
415

    
416

    
417
def update_public_meta(public, meta):
418
    if not public:
419
        return
420
    meta['X-Object-Public'] = join_urls(
421
        BASE_HOST, reverse('pithos.api.public.public_demux', args=(public,)))
422

    
423

    
424
def validate_modification_preconditions(request, meta):
425
    """Check the modified timestamp conforms with the preconditions set."""
426

    
427
    if 'modified' not in meta:
428
        return  # TODO: Always return?
429

    
430
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
431
    if if_modified_since is not None:
432
        if_modified_since = parse_http_date_safe(if_modified_since)
433
    if (if_modified_since is not None
434
            and int(meta['modified']) <= if_modified_since):
435
        raise faults.NotModified('Resource has not been modified')
436

    
437
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
438
    if if_unmodified_since is not None:
439
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
440
    if (if_unmodified_since is not None
441
            and int(meta['modified']) > if_unmodified_since):
442
        raise faults.PreconditionFailed('Resource has been modified')
443

    
444

    
445
def validate_matching_preconditions(request, meta):
446
    """Check that the ETag conforms with the preconditions set."""
447

    
448
    etag = meta['hash'] if not UPDATE_MD5 else meta['checksum']
449
    if not etag:
450
        etag = None
451

    
452
    if_match = request.META.get('HTTP_IF_MATCH')
453
    if if_match is not None:
454
        if etag is None:
455
            raise faults.PreconditionFailed('Resource does not exist')
456
        if (if_match != '*'
457
                and etag not in [x.lower() for x in parse_etags(if_match)]):
458
            raise faults.PreconditionFailed('Resource ETag does not match')
459

    
460
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
461
    if if_none_match is not None:
462
        # TODO: If this passes, must ignore If-Modified-Since header.
463
        if etag is not None:
464
            if (if_none_match == '*' or etag in [x.lower() for x in
465
                                                 parse_etags(if_none_match)]):
466
                # TODO: Continue if an If-Modified-Since header is present.
467
                if request.method in ('HEAD', 'GET'):
468
                    raise faults.NotModified('Resource ETag matches')
469
                raise faults.PreconditionFailed(
470
                    'Resource exists or ETag matches')
471

    
472

    
473
def split_container_object_string(s):
474
    if not len(s) > 0 or s[0] != '/':
475
        raise ValueError
476
    s = s[1:]
477
    pos = s.find('/')
478
    if pos == -1 or pos == len(s) - 1:
479
        raise ValueError
480
    return s[:pos], s[(pos + 1):]
481

    
482

    
483
def copy_or_move_object(request, src_account, src_container, src_name,
484
                        dest_account, dest_container, dest_name,
485
                        move=False, delimiter=None):
486
    """Copy or move an object."""
487

    
488
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
489
        del(request.META['CONTENT_TYPE'])
490
    content_type, meta, permissions, public = get_object_headers(request)
491
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
492
    try:
493
        if move:
494
            version_id = request.backend.move_object(
495
                request.user_uniq, src_account, src_container, src_name,
496
                dest_account, dest_container, dest_name,
497
                content_type, 'pithos', meta, False, permissions, delimiter)
498
        else:
499
            version_id = request.backend.copy_object(
500
                request.user_uniq, src_account, src_container, src_name,
501
                dest_account, dest_container, dest_name,
502
                content_type, 'pithos', meta, False, permissions,
503
                src_version, delimiter)
504
    except NotAllowedError:
505
        raise faults.Forbidden('Not allowed')
506
    except (ItemNotExists, VersionNotExists):
507
        raise faults.ItemNotFound('Container or object does not exist')
508
    except ValueError:
509
        raise faults.BadRequest('Invalid sharing header')
510
    except QuotaError, e:
511
        raise faults.RequestEntityTooLarge('Quota error: %s' % e)
512
    if public is not None:
513
        try:
514
            request.backend.update_object_public(
515
                request.user_uniq, dest_account,
516
                dest_container, dest_name, public)
517
        except NotAllowedError:
518
            raise faults.Forbidden('Not allowed')
519
        except ItemNotExists:
520
            raise faults.ItemNotFound('Object does not exist')
521
    return version_id
522

    
523

    
524
def get_int_parameter(p):
525
    if p is not None:
526
        try:
527
            p = int(p)
528
        except ValueError:
529
            return None
530
        if p < 0:
531
            return None
532
    return p
533

    
534

    
535
def get_content_length(request):
536
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
537
    if content_length is None:
538
        raise faults.LengthRequired('Missing or invalid Content-Length header')
539
    return content_length
540

    
541

    
542
def get_range(request, size):
543
    """Parse a Range header from the request.
544

545
    Either returns None, when the header is not existent or should be ignored,
546
    or a list of (offset, length) tuples - should be further checked.
547
    """
548

    
549
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
550
    if not ranges.startswith('bytes='):
551
        return None
552

    
553
    ret = []
554
    for r in (x.strip() for x in ranges[6:].split(',')):
555
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
556
        m = p.match(r)
557
        if not m:
558
            return None
559
        offset = m.group('offset')
560
        upto = m.group('upto')
561
        if offset == '' and upto == '':
562
            return None
563

    
564
        if offset != '':
565
            offset = int(offset)
566
            if upto != '':
567
                upto = int(upto)
568
                if offset > upto:
569
                    return None
570
                ret.append((offset, upto - offset + 1))
571
            else:
572
                ret.append((offset, size - offset))
573
        else:
574
            length = int(upto)
575
            ret.append((size - length, length))
576

    
577
    return ret
578

    
579

    
580
def get_content_range(request):
581
    """Parse a Content-Range header from the request.
582

583
    Either returns None, when the header is not existent or should be ignored,
584
    or an (offset, length, total) tuple - check as length, total may be None.
585
    Returns (None, None, None) if the provided range is '*/*'.
586
    """
587

    
588
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
589
    if not ranges:
590
        return None
591

    
592
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
593
    m = p.match(ranges)
594
    if not m:
595
        if ranges == 'bytes */*':
596
            return (None, None, None)
597
        return None
598
    offset = int(m.group('offset'))
599
    upto = m.group('upto')
600
    total = m.group('total')
601
    if upto != '':
602
        upto = int(upto)
603
    else:
604
        upto = None
605
    if total != '*':
606
        total = int(total)
607
    else:
608
        total = None
609
    if (upto is not None and offset > upto) or \
610
        (total is not None and offset >= total) or \
611
            (total is not None and upto is not None and upto >= total):
612
        return None
613

    
614
    if upto is None:
615
        length = None
616
    else:
617
        length = upto - offset + 1
618
    return (offset, length, total)
619

    
620

    
621
def get_sharing(request):
622
    """Parse an X-Object-Sharing header from the request.
623

624
    Raises BadRequest on error.
625
    """
626

    
627
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
628
    if permissions is None:
629
        return None
630

    
631
    # TODO: Document or remove '~' replacing.
632
    permissions = permissions.replace('~', '')
633

    
634
    ret = {}
635
    permissions = permissions.replace(' ', '')
636
    if permissions == '':
637
        return ret
638
    for perm in (x for x in permissions.split(';')):
639
        if perm.startswith('read='):
640
            ret['read'] = list(set(
641
                [v.replace(' ', '').lower() for v in perm[5:].split(',')]))
642
            if '' in ret['read']:
643
                ret['read'].remove('')
644
            if '*' in ret['read']:
645
                ret['read'] = ['*']
646
            if len(ret['read']) == 0:
647
                raise faults.BadRequest(
648
                    'Bad X-Object-Sharing header value: invalid length')
649
        elif perm.startswith('write='):
650
            ret['write'] = list(set(
651
                [v.replace(' ', '').lower() for v in perm[6:].split(',')]))
652
            if '' in ret['write']:
653
                ret['write'].remove('')
654
            if '*' in ret['write']:
655
                ret['write'] = ['*']
656
            if len(ret['write']) == 0:
657
                raise faults.BadRequest(
658
                    'Bad X-Object-Sharing header value: invalid length')
659
        else:
660
            raise faults.BadRequest(
661
                'Bad X-Object-Sharing header value: missing prefix')
662

    
663
    # replace displayname with uuid
664
    if TRANSLATE_UUIDS:
665
        try:
666
            ret['read'] = [replace_permissions_displayname(
667
                getattr(request, 'token', None), x)
668
                for x in ret.get('read', [])]
669
            ret['write'] = [replace_permissions_displayname(
670
                getattr(request, 'token', None), x)
671
                for x in ret.get('write', [])]
672
        except ItemNotExists, e:
673
            raise faults.BadRequest(
674
                'Bad X-Object-Sharing header value: unknown account: %s' % e)
675

    
676
    # Keep duplicates only in write list.
677
    dups = [x for x in ret.get(
678
        'read', []) if x in ret.get('write', []) and x != '*']
679
    if dups:
680
        for x in dups:
681
            ret['read'].remove(x)
682
        if len(ret['read']) == 0:
683
            del(ret['read'])
684

    
685
    return ret
686

    
687

    
688
def get_public(request):
689
    """Parse an X-Object-Public header from the request.
690

691
    Raises BadRequest on error.
692
    """
693

    
694
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
695
    if public is None:
696
        return None
697

    
698
    public = public.replace(' ', '').lower()
699
    if public == 'true':
700
        return True
701
    elif public == 'false' or public == '':
702
        return False
703
    raise faults.BadRequest('Bad X-Object-Public header value')
704

    
705

    
706
def raw_input_socket(request):
707
    """Return the socket for reading the rest of the request."""
708

    
709
    server_software = request.META.get('SERVER_SOFTWARE')
710
    if server_software and server_software.startswith('mod_python'):
711
        return request._req
712
    if 'wsgi.input' in request.environ:
713
        return request.environ['wsgi.input']
714
    raise NotImplemented('Unknown server software')
715

    
716
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024)  # 5GB
717

    
718

    
719
def socket_read_iterator(request, length=0, blocksize=4096):
720
    """Return maximum of blocksize data read from the socket in each iteration
721

722
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
723
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
724
    """
725

    
726
    sock = raw_input_socket(request)
727
    if length < 0:  # Chunked transfers
728
        # Small version (server does the dechunking).
729
        if (request.environ.get('mod_wsgi.input_chunked', None)
730
                or request.META['SERVER_SOFTWARE'].startswith('gunicorn')):
731
            while length < MAX_UPLOAD_SIZE:
732
                data = sock.read(blocksize)
733
                if data == '':
734
                    return
735
                yield data
736
            raise faults.BadRequest('Maximum size is reached')
737

    
738
        # Long version (do the dechunking).
739
        data = ''
740
        while length < MAX_UPLOAD_SIZE:
741
            # Get chunk size.
742
            if hasattr(sock, 'readline'):
743
                chunk_length = sock.readline()
744
            else:
745
                chunk_length = ''
746
                while chunk_length[-1:] != '\n':
747
                    chunk_length += sock.read(1)
748
                chunk_length.strip()
749
            pos = chunk_length.find(';')
750
            if pos >= 0:
751
                chunk_length = chunk_length[:pos]
752
            try:
753
                chunk_length = int(chunk_length, 16)
754
            except Exception:
755
                raise faults.BadRequest('Bad chunk size')
756
                                 # TODO: Change to something more appropriate.
757
            # Check if done.
758
            if chunk_length == 0:
759
                if len(data) > 0:
760
                    yield data
761
                return
762
            # Get the actual data.
763
            while chunk_length > 0:
764
                chunk = sock.read(min(chunk_length, blocksize))
765
                chunk_length -= len(chunk)
766
                if length > 0:
767
                    length += len(chunk)
768
                data += chunk
769
                if len(data) >= blocksize:
770
                    ret = data[:blocksize]
771
                    data = data[blocksize:]
772
                    yield ret
773
            sock.read(2)  # CRLF
774
        raise faults.BadRequest('Maximum size is reached')
775
    else:
776
        if length > MAX_UPLOAD_SIZE:
777
            raise faults.BadRequest('Maximum size is reached')
778
        while length > 0:
779
            data = sock.read(min(length, blocksize))
780
            if not data:
781
                raise faults.BadRequest()
782
            length -= len(data)
783
            yield data
784

    
785

    
786
class SaveToBackendHandler(FileUploadHandler):
787
    """Handle a file from an HTML form the django way."""
788

    
789
    def __init__(self, request=None):
790
        super(SaveToBackendHandler, self).__init__(request)
791
        self.backend = request.backend
792

    
793
    def put_data(self, length):
794
        if len(self.data) >= length:
795
            block = self.data[:length]
796
            self.file.hashmap.append(self.backend.put_block(block))
797
            self.checksum_compute.update(block)
798
            self.data = self.data[length:]
799

    
800
    def new_file(self, field_name, file_name, content_type,
801
                 content_length, charset=None):
802
        self.checksum_compute = NoChecksum() if not UPDATE_MD5 else Checksum()
803
        self.data = ''
804
        self.file = UploadedFile(
805
            name=file_name, content_type=content_type, charset=charset)
806
        self.file.size = 0
807
        self.file.hashmap = []
808

    
809
    def receive_data_chunk(self, raw_data, start):
810
        self.data += raw_data
811
        self.file.size += len(raw_data)
812
        self.put_data(self.request.backend.block_size)
813
        return None
814

    
815
    def file_complete(self, file_size):
816
        l = len(self.data)
817
        if l > 0:
818
            self.put_data(l)
819
        self.file.etag = self.checksum_compute.hexdigest()
820
        return self.file
821

    
822

    
823
class ObjectWrapper(object):
824
    """Return the object's data block-per-block in each iteration.
825

826
    Read from the object using the offset and length provided
827
    in each entry of the range list.
828
    """
829

    
830
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
831
        self.backend = backend
832
        self.ranges = ranges
833
        self.sizes = sizes
834
        self.hashmaps = hashmaps
835
        self.boundary = boundary
836
        self.size = sum(self.sizes)
837

    
838
        self.file_index = 0
839
        self.block_index = 0
840
        self.block_hash = -1
841
        self.block = ''
842

    
843
        self.range_index = -1
844
        self.offset, self.length = self.ranges[0]
845

    
846
    def __iter__(self):
847
        return self
848

    
849
    def part_iterator(self):
850
        if self.length > 0:
851
            # Get the file for the current offset.
852
            file_size = self.sizes[self.file_index]
853
            while self.offset >= file_size:
854
                self.offset -= file_size
855
                self.file_index += 1
856
                file_size = self.sizes[self.file_index]
857

    
858
            # Get the block for the current position.
859
            self.block_index = int(self.offset / self.backend.block_size)
860
            if self.block_hash != \
861
                    self.hashmaps[self.file_index][self.block_index]:
862
                self.block_hash = self.hashmaps[
863
                    self.file_index][self.block_index]
864
                try:
865
                    self.block = self.backend.get_block(self.block_hash)
866
                except ItemNotExists:
867
                    raise faults.ItemNotFound('Block does not exist')
868

    
869
            # Get the data from the block.
870
            bo = self.offset % self.backend.block_size
871
            bs = self.backend.block_size
872
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
873
                    self.sizes[self.file_index] % self.backend.block_size):
874
                bs = self.sizes[self.file_index] % self.backend.block_size
875
            bl = min(self.length, bs - bo)
876
            data = self.block[bo:bo + bl]
877
            self.offset += bl
878
            self.length -= bl
879
            return data
880
        else:
881
            raise StopIteration
882

    
883
    def next(self):
884
        if len(self.ranges) == 1:
885
            return self.part_iterator()
886
        if self.range_index == len(self.ranges):
887
            raise StopIteration
888
        try:
889
            if self.range_index == -1:
890
                raise StopIteration
891
            return self.part_iterator()
892
        except StopIteration:
893
            self.range_index += 1
894
            out = []
895
            if self.range_index < len(self.ranges):
896
                # Part header.
897
                self.offset, self.length = self.ranges[self.range_index]
898
                self.file_index = 0
899
                if self.range_index > 0:
900
                    out.append('')
901
                out.append('--' + self.boundary)
902
                out.append('Content-Range: bytes %d-%d/%d' % (
903
                    self.offset, self.offset + self.length - 1, self.size))
904
                out.append('Content-Transfer-Encoding: binary')
905
                out.append('')
906
                out.append('')
907
                return '\r\n'.join(out)
908
            else:
909
                # Footer.
910
                out.append('')
911
                out.append('--' + self.boundary + '--')
912
                out.append('')
913
                return '\r\n'.join(out)
914

    
915

    
916
def object_data_response(request, sizes, hashmaps, meta, public=False):
917
    """Get the HttpResponse object for replying with the object's data."""
918

    
919
    # Range handling.
920
    size = sum(sizes)
921
    ranges = get_range(request, size)
922
    if ranges is None:
923
        ranges = [(0, size)]
924
        ret = 200
925
    else:
926
        check = [True for offset, length in ranges if
927
                 length <= 0 or length > size or
928
                 offset < 0 or offset >= size or
929
                 offset + length > size]
930
        if len(check) > 0:
931
            raise faults.RangeNotSatisfiable(
932
                'Requested range exceeds object limits')
933
        ret = 206
934
        if_range = request.META.get('HTTP_IF_RANGE')
935
        if if_range:
936
            try:
937
                # Modification time has passed instead.
938
                last_modified = parse_http_date(if_range)
939
                if last_modified != meta['modified']:
940
                    ranges = [(0, size)]
941
                    ret = 200
942
            except ValueError:
943
                if if_range != meta['checksum']:
944
                    ranges = [(0, size)]
945
                    ret = 200
946

    
947
    if ret == 206 and len(ranges) > 1:
948
        boundary = uuid.uuid4().hex
949
    else:
950
        boundary = ''
951
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
952
    response = HttpResponse(wrapper, status=ret)
953
    put_object_headers(
954
        response, meta, restricted=public,
955
        token=getattr(request, 'token', None))
956
    if ret == 206:
957
        if len(ranges) == 1:
958
            offset, length = ranges[0]
959
            response[
960
                'Content-Length'] = length  # Update with the correct length.
961
            response['Content-Range'] = 'bytes %d-%d/%d' % (
962
                offset, offset + length - 1, size)
963
        else:
964
            del(response['Content-Length'])
965
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (
966
                boundary,)
967
    return response
968

    
969

    
970
def put_object_block(request, hashmap, data, offset):
971
    """Put one block of data at the given offset."""
972

    
973
    bi = int(offset / request.backend.block_size)
974
    bo = offset % request.backend.block_size
975
    bl = min(len(data), request.backend.block_size - bo)
976
    if bi < len(hashmap):
977
        try:
978
            hashmap[bi] = request.backend.update_block(hashmap[bi],
979
                                                       data[:bl], bo)
980
        except IllegalOperationError, e:
981
            raise faults.Forbidden(e[0])
982
    else:
983
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
984
    return bl  # Return ammount of data written.
985

    
986

    
987
def hashmap_md5(backend, hashmap, size):
988
    """Produce the MD5 sum from the data in the hashmap."""
989

    
990
    # TODO: Search backend for the MD5 of another object
991
    #       with the same hashmap and size...
992
    md5 = hashlib.md5()
993
    bs = backend.block_size
994
    for bi, hash in enumerate(hashmap):
995
        data = backend.get_block(hash)  # Blocks come in padded.
996
        if bi == len(hashmap) - 1:
997
            data = data[:size % bs]
998
        md5.update(data)
999
    return md5.hexdigest().lower()
1000

    
1001

    
1002
def simple_list_response(request, l):
1003
    if request.serialization == 'text':
1004
        return '\n'.join(l) + '\n'
1005
    if request.serialization == 'xml':
1006
        return render_to_string('items.xml', {'items': l})
1007
    if request.serialization == 'json':
1008
        return json.dumps(l)
1009

    
1010

    
1011
from pithos.backends.util import PithosBackendPool
1012

    
1013
if RADOS_STORAGE:
1014
    BLOCK_PARAMS = {'mappool': RADOS_POOL_MAPS,
1015
                    'blockpool': RADOS_POOL_BLOCKS, }
1016
else:
1017
    BLOCK_PARAMS = {'mappool': None,
1018
                    'blockpool': None, }
1019

    
1020
BACKEND_KWARGS = dict(
1021
    db_module=BACKEND_DB_MODULE,
1022
    db_connection=BACKEND_DB_CONNECTION,
1023
    block_module=BACKEND_BLOCK_MODULE,
1024
    block_path=BACKEND_BLOCK_PATH,
1025
    block_umask=BACKEND_BLOCK_UMASK,
1026
    block_size=BACKEND_BLOCK_SIZE,
1027
    hash_algorithm=BACKEND_HASH_ALGORITHM,
1028
    queue_module=BACKEND_QUEUE_MODULE,
1029
    queue_hosts=BACKEND_QUEUE_HOSTS,
1030
    queue_exchange=BACKEND_QUEUE_EXCHANGE,
1031
    astakos_auth_url=ASTAKOS_AUTH_URL,
1032
    service_token=SERVICE_TOKEN,
1033
    astakosclient_poolsize=ASTAKOSCLIENT_POOLSIZE,
1034
    free_versioning=BACKEND_FREE_VERSIONING,
1035
    block_params=BLOCK_PARAMS,
1036
    public_url_security=PUBLIC_URL_SECURITY,
1037
    public_url_alphabet=PUBLIC_URL_ALPHABET,
1038
    account_quota_policy=BACKEND_ACCOUNT_QUOTA,
1039
    container_quota_policy=BACKEND_CONTAINER_QUOTA,
1040
    container_versioning_policy=BACKEND_VERSIONING,
1041
    archipelago_conf_file=BACKEND_ARCHIPELAGO_CONF,
1042
    xseg_pool_size=BACKEND_XSEG_POOL_SIZE,
1043
    map_check_interval=BACKEND_MAP_CHECK_INTERVAL)
1044

    
1045
_pithos_backend_pool = PithosBackendPool(size=BACKEND_POOL_SIZE,
1046
                                         **BACKEND_KWARGS)
1047

    
1048

    
1049
def get_backend():
1050
    if BACKEND_POOL_ENABLED:
1051
        backend = _pithos_backend_pool.pool_get()
1052
    else:
1053
        backend = connect_backend(**BACKEND_KWARGS)
1054
    backend.serials = []
1055
    backend.messages = []
1056
    return backend
1057

    
1058

    
1059
def update_request_headers(request):
1060
    # Handle URL-encoded keys and values.
1061
    meta = dict([(
1062
        k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
1063
    for k, v in meta.iteritems():
1064
        try:
1065
            k.decode('ascii')
1066
            v.decode('ascii')
1067
        except UnicodeDecodeError:
1068
            raise faults.BadRequest('Bad character in headers.')
1069
        if '%' in k or '%' in v:
1070
            del(request.META[k])
1071
            request.META[unquote(k)] = smart_unicode(unquote(
1072
                v), strings_only=True)
1073

    
1074

    
1075
def update_response_headers(request, response):
1076
    # URL-encode unicode in headers.
1077
    meta = response.items()
1078
    for k, v in meta:
1079
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
1080
                k.startswith('X-Object-') or k.startswith('Content-')):
1081
            del(response[k])
1082
            response[quote(k)] = quote(v, safe='/=,:@; ')
1083

    
1084

    
1085
def api_method(http_method=None, token_required=True, user_required=True,
1086
               logger=None, format_allowed=False, serializations=None,
1087
               strict_serlization=False, lock_container_path=False):
1088
    serializations = serializations or ['json', 'xml']
1089

    
1090
    def decorator(func):
1091
        @api.api_method(http_method=http_method, token_required=token_required,
1092
                        user_required=user_required,
1093
                        logger=logger, format_allowed=format_allowed,
1094
                        astakos_auth_url=ASTAKOS_AUTH_URL,
1095
                        serializations=serializations,
1096
                        strict_serlization=strict_serlization)
1097
        @wraps(func)
1098
        def wrapper(request, *args, **kwargs):
1099
            # The args variable may contain up to (account, container, object).
1100
            if len(args) > 1 and len(args[1]) > 256:
1101
                raise faults.BadRequest("Container name too large")
1102
            if len(args) > 2 and len(args[2]) > 1024:
1103
                raise faults.BadRequest('Object name too large.')
1104

    
1105
            success_status = False
1106
            try:
1107
                # Add a PithosBackend as attribute of the request object
1108
                request.backend = get_backend()
1109
                request.backend.pre_exec(lock_container_path)
1110

    
1111
                # Many API method expect thet X-Auth-Token in request,token
1112
                request.token = request.x_auth_token
1113
                update_request_headers(request)
1114
                response = func(request, *args, **kwargs)
1115
                update_response_headers(request, response)
1116

    
1117
                success_status = True
1118
                return response
1119
            finally:
1120
                # Always close PithosBackend connection
1121
                if getattr(request, "backend", None) is not None:
1122
                    request.backend.post_exec(success_status)
1123
                    request.backend.close()
1124
        return wrapper
1125
    return decorator
1126

    
1127

    
1128
def restrict_to_host(host=None):
1129
    """
1130
    View decorator which restricts wrapped view to be accessed only under the
1131
    host set. If an invalid host is identified and request HTTP method is one
1132
    of ``GET``, ``HOST``, the decorator will return a redirect response using a
1133
    clone of the request with host replaced to the one the restriction applies
1134
    to.
1135

1136
    e.g.
1137
    @restrict_to_host('files.example.com')
1138
    my_restricted_view(request, path):
1139
        return HttpResponse(file(path).read())
1140

1141
    A get to ``https://api.example.com/my_restricted_view/file_path/?param=1``
1142
    will return a redirect response with Location header set to
1143
    ``https://files.example.com/my_restricted_view/file_path/?param=1``.
1144

1145
    If host is set to ``None`` no restriction will be applied.
1146
    """
1147
    def decorator(func):
1148
        # skip decoration if no host is set
1149
        if not host:
1150
            return func
1151

    
1152
        @wraps(func)
1153
        def wrapper(request, *args, **kwargs):
1154
            request_host = request.get_host()
1155
            if host != request_host:
1156
                proto = 'https' if request.is_secure() else 'http'
1157
                if request.method in ['GET', 'HEAD']:
1158
                    full_path = request.get_full_path()
1159
                    redirect_uri = "%s://%s%s" % (proto, host, full_path)
1160
                    return HttpResponseRedirect(redirect_uri)
1161
                else:
1162
                    raise PermissionDenied
1163
            return func(request, *args, **kwargs)
1164
        return wrapper
1165
    return decorator
1166

    
1167

    
1168
def view_method():
1169
    """Decorator function for views."""
1170

    
1171
    def decorator(func):
1172
        @restrict_to_host(UNSAFE_DOMAIN)
1173
        @wraps(func)
1174
        def wrapper(request, *args, **kwargs):
1175
            if request.method not in ['GET', 'HEAD']:
1176
                return HttpResponseNotAllowed(['GET', 'HEAD'])
1177

    
1178
            try:
1179
                access_token = request.GET.get('access_token')
1180
                requested_resource = text.uenc(request.path.split(VIEW_PREFIX,
1181
                                                                  2)[-1])
1182
                astakos = AstakosClient(SERVICE_TOKEN, ASTAKOS_AUTH_URL,
1183
                                        retry=2, use_pool=True,
1184
                                        logger=logger)
1185
                if access_token is not None:
1186
                    # authenticate using the short-term access token
1187
                    try:
1188
                        request.user = astakos.validate_token(
1189
                            access_token, requested_resource)
1190
                    except AstakosClientException:
1191
                        return HttpResponseRedirect(request.path)
1192
                    request.user_uniq = request.user["access"]["user"]["id"]
1193

    
1194
                    _func = api_method(token_required=False,
1195
                                       user_required=False)(func)
1196
                    response = _func(request, *args, **kwargs)
1197
                    if response.status_code == 404:
1198
                        raise Http404
1199
                    elif response.status_code == 403:
1200
                        raise PermissionDenied
1201
                    return response
1202

    
1203
                client_id, client_secret = OAUTH2_CLIENT_CREDENTIALS
1204
                # TODO: check if client credentials are not set
1205
                authorization_code = request.GET.get('code')
1206
                if authorization_code is None:
1207
                    # request authorization code
1208
                    params = {'response_type': 'code',
1209
                              'client_id': client_id,
1210
                              'redirect_uri':
1211
                              request.build_absolute_uri(request.path),
1212
                              'state': '',  # TODO include state for security
1213
                              'scope': requested_resource}
1214
                    return HttpResponseRedirect('%s?%s' %
1215
                                                (join_urls(astakos.oauth2_url,
1216
                                                           'auth'),
1217
                                                 urlencode(params)))
1218
                else:
1219
                    # request short-term access token
1220
                    redirect_uri = request.build_absolute_uri(request.path)
1221
                    data = astakos.get_token('authorization_code',
1222
                                             *OAUTH2_CLIENT_CREDENTIALS,
1223
                                             redirect_uri=redirect_uri,
1224
                                             scope=requested_resource,
1225
                                             code=authorization_code)
1226
                    params = {'access_token': data.get('access_token', '')}
1227
                    return HttpResponseRedirect('%s?%s' % (redirect_uri,
1228
                                                           urlencode(params)))
1229
            except AstakosClientException, err:
1230
                logger.exception(err)
1231
                raise PermissionDenied
1232
        return wrapper
1233
    return decorator
1234

    
1235

    
1236
class Checksum:
1237
    def __init__(self):
1238
        self.md5 = hashlib.md5()
1239

    
1240
    def update(self, data):
1241
        self.md5.update(data)
1242

    
1243
    def hexdigest(self):
1244
        return self.md5.hexdigest().lower()
1245

    
1246

    
1247
class NoChecksum:
1248
    def update(self, data):
1249
        pass
1250

    
1251
    def hexdigest(self):
1252
        return ''