Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 8c7e1398

History | View | Annotate | Download (46 kB)

1
# Copyright 2011-2013 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
#
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
#
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
#
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from datetime import datetime
36
from urllib import quote, unquote, urlencode
37
from urlparse import urlunsplit, urlsplit, parse_qsl
38

    
39
from django.http import (HttpResponse, Http404, HttpResponseRedirect,
40
                         HttpResponseNotAllowed)
41
from django.template.loader import render_to_string
42
from django.utils import simplejson as json
43
from django.utils.http import http_date, parse_etags
44
from django.utils.encoding import smart_unicode, smart_str
45
from django.core.files.uploadhandler import FileUploadHandler
46
from django.core.files.uploadedfile import UploadedFile
47
from django.core.urlresolvers import reverse
48
from django.core.exceptions import PermissionDenied
49

    
50
from snf_django.lib.api.parsedate import parse_http_date_safe, parse_http_date
51
from snf_django.lib import api
52
from snf_django.lib.api import faults, utils
53

    
54
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
55
                                 BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
56
                                 BACKEND_BLOCK_UMASK,
57
                                 BACKEND_QUEUE_MODULE, BACKEND_QUEUE_HOSTS,
58
                                 BACKEND_QUEUE_EXCHANGE,
59
                                 ASTAKOSCLIENT_POOLSIZE,
60
                                 SERVICE_TOKEN,
61
                                 ASTAKOS_AUTH_URL,
62
                                 BACKEND_ACCOUNT_QUOTA,
63
                                 BACKEND_CONTAINER_QUOTA,
64
                                 BACKEND_VERSIONING, BACKEND_FREE_VERSIONING,
65
                                 BACKEND_POOL_ENABLED, BACKEND_POOL_SIZE,
66
                                 BACKEND_BLOCK_SIZE, BACKEND_HASH_ALGORITHM,
67
                                 RADOS_STORAGE, RADOS_POOL_BLOCKS,
68
                                 RADOS_POOL_MAPS, TRANSLATE_UUIDS,
69
                                 PUBLIC_URL_SECURITY, PUBLIC_URL_ALPHABET,
70
                                 BASE_HOST, UPDATE_MD5, VIEW_PREFIX,
71
                                 OAUTH2_CLIENT_CREDENTIALS, UNSAFE_DOMAIN)
72

    
73
from pithos.api.resources import resources
74
from pithos.backends import connect_backend
75
from pithos.backends.base import (NotAllowedError, QuotaError, ItemNotExists,
76
                                  VersionNotExists)
77

    
78
from synnefo.lib import join_urls
79
from synnefo.util import text
80

    
81
from astakosclient import AstakosClient
82
from astakosclient.errors import NoUserName, NoUUID, AstakosClientException
83

    
84
import logging
85
import re
86
import hashlib
87
import uuid
88
import decimal
89

    
90
logger = logging.getLogger(__name__)
91

    
92

    
93
def json_encode_decimal(obj):
94
    if isinstance(obj, decimal.Decimal):
95
        return str(obj)
96
    raise TypeError(repr(obj) + " is not JSON serializable")
97

    
98

    
99
def rename_meta_key(d, old, new):
100
    if old not in d:
101
        return
102
    d[new] = d[old]
103
    del(d[old])
104

    
105

    
106
def printable_header_dict(d):
107
    """Format a meta dictionary for printing out json/xml.
108

109
    Convert all keys to lower case and replace dashes with underscores.
110
    Format 'last_modified' timestamp.
111
    """
112

    
113
    timestamps = ('last_modified', 'x_container_until_timestamp',
114
                  'x_acount_until_timestamp')
115
    for timestamp in timestamps:
116
        if timestamp in d and d[timestamp]:
117
            d[timestamp] = utils.isoformat(
118
                datetime.fromtimestamp(d[timestamp]))
119
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
120

    
121

    
122
def format_header_key(k):
123
    """Convert underscores to dashes and capitalize intra-dash strings."""
124
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
125

    
126

    
127
def get_header_prefix(request, prefix):
128
    """Get all prefix-* request headers in a dict.
129
       Reformat keys with format_header_key()."""
130

    
131
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
132
    # TODO: Document or remove '~' replacing.
133
    return dict([(format_header_key(k[5:]), v.replace('~', ''))
134
                for k, v in request.META.iteritems()
135
                if k.startswith(prefix) and len(k) > len(prefix)])
136

    
137

    
138
def check_meta_headers(meta):
139
    if len(meta) > 90:
140
        raise faults.BadRequest('Too many headers.')
141
    for k, v in meta.iteritems():
142
        if len(k) > 128:
143
            raise faults.BadRequest('Header name too large.')
144
        if len(v) > 256:
145
            raise faults.BadRequest('Header value too large.')
146

    
147

    
148
def get_account_headers(request):
149
    meta = get_header_prefix(request, 'X-Account-Meta-')
150
    check_meta_headers(meta)
151
    groups = {}
152
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
153
        n = k[16:].lower()
154
        if '-' in n or '_' in n:
155
            raise faults.BadRequest('Bad characters in group name')
156
        groups[n] = v.replace(' ', '').split(',')
157
        while '' in groups[n]:
158
            groups[n].remove('')
159
    return meta, groups
160

    
161

    
162
def put_account_headers(response, meta, groups, policy):
163
    if 'count' in meta:
164
        response['X-Account-Container-Count'] = meta['count']
165
    if 'bytes' in meta:
166
        response['X-Account-Bytes-Used'] = meta['bytes']
167
    response['Last-Modified'] = http_date(int(meta['modified']))
168
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
169
        response[smart_str(
170
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
171
    if 'until_timestamp' in meta:
172
        response['X-Account-Until-Timestamp'] = http_date(
173
            int(meta['until_timestamp']))
174
    for k, v in groups.iteritems():
175
        k = smart_str(k, strings_only=True)
176
        k = format_header_key('X-Account-Group-' + k)
177
        v = smart_str(','.join(v), strings_only=True)
178
        response[k] = v
179
    for k, v in policy.iteritems():
180
        response[smart_str(format_header_key('X-Account-Policy-' + k),
181
                 strings_only=True)] = smart_str(v, strings_only=True)
182

    
183

    
184
def get_container_headers(request):
185
    meta = get_header_prefix(request, 'X-Container-Meta-')
186
    check_meta_headers(meta)
187
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in
188
                  get_header_prefix(request,
189
                                    'X-Container-Policy-').iteritems()])
190
    return meta, policy
191

    
192

    
193
def put_container_headers(request, response, meta, policy):
194
    if 'count' in meta:
195
        response['X-Container-Object-Count'] = meta['count']
196
    if 'bytes' in meta:
197
        response['X-Container-Bytes-Used'] = meta['bytes']
198
    response['Last-Modified'] = http_date(int(meta['modified']))
199
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
200
        response[smart_str(
201
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
202
    l = [smart_str(x, strings_only=True) for x in meta['object_meta']
203
         if x.startswith('X-Object-Meta-')]
204
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
205
    response['X-Container-Block-Size'] = request.backend.block_size
206
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
207
    if 'until_timestamp' in meta:
208
        response['X-Container-Until-Timestamp'] = http_date(
209
            int(meta['until_timestamp']))
210
    for k, v in policy.iteritems():
211
        response[smart_str(format_header_key('X-Container-Policy-' + k),
212
                           strings_only=True)] = smart_str(v,
213
                                                           strings_only=True)
214

    
215

    
216
def get_object_headers(request):
217
    content_type = request.META.get('CONTENT_TYPE', None)
218
    meta = get_header_prefix(request, 'X-Object-Meta-')
219
    check_meta_headers(meta)
220
    if request.META.get('HTTP_CONTENT_ENCODING'):
221
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
222
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
223
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
224
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
225
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
226
    return content_type, meta, get_sharing(request), get_public(request)
227

    
228

    
229
def put_object_headers(response, meta, restricted=False, token=None,
230
                       disposition_type=None):
231
    response['ETag'] = meta['hash'] if not UPDATE_MD5 else meta['checksum']
232
    response['Content-Length'] = meta['bytes']
233
    response.override_serialization = True
234
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
235
    response['Last-Modified'] = http_date(int(meta['modified']))
236
    if not restricted:
237
        response['X-Object-Hash'] = meta['hash']
238
        response['X-Object-UUID'] = meta['uuid']
239
        if TRANSLATE_UUIDS:
240
            meta['modified_by'] = \
241
                retrieve_displayname(token, meta['modified_by'])
242
        response['X-Object-Modified-By'] = smart_str(
243
            meta['modified_by'], strings_only=True)
244
        response['X-Object-Version'] = meta['version']
245
        response['X-Object-Version-Timestamp'] = http_date(
246
            int(meta['version_timestamp']))
247
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
248
            response[smart_str(
249
                k, strings_only=True)] = smart_str(meta[k], strings_only=True)
250
        for k in (
251
            'Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
252
            'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
253
                'X-Object-Public'):
254
            if k in meta:
255
                response[k] = smart_str(meta[k], strings_only=True)
256
    else:
257
        for k in ('Content-Encoding', 'Content-Disposition'):
258
            if k in meta:
259
                response[k] = smart_str(meta[k], strings_only=True)
260
    disposition_type = disposition_type if disposition_type in \
261
        ('inline', 'attachment') else None
262
    if disposition_type is not None:
263
        response['Content-Disposition'] = '%s; filename=%s' % (
264
            disposition_type, meta['name'])
265

    
266

    
267
def update_manifest_meta(request, v_account, meta):
268
    """Update metadata if the object has an X-Object-Manifest."""
269

    
270
    if 'X-Object-Manifest' in meta:
271
        etag = ''
272
        bytes = 0
273
        try:
274
            src_container, src_name = split_container_object_string(
275
                '/' + meta['X-Object-Manifest'])
276
            objects = request.backend.list_objects(
277
                request.user_uniq, v_account,
278
                src_container, prefix=src_name, virtual=False)
279
            for x in objects:
280
                src_meta = request.backend.get_object_meta(
281
                    request.user_uniq, v_account, src_container, x[0],
282
                    'pithos', x[1])
283
                etag += (src_meta['hash'] if not UPDATE_MD5 else
284
                         src_meta['checksum'])
285
                bytes += src_meta['bytes']
286
        except:
287
            # Ignore errors.
288
            return
289
        meta['bytes'] = bytes
290
        md5 = hashlib.md5()
291
        md5.update(etag)
292
        meta['checksum'] = md5.hexdigest().lower()
293

    
294

    
295
def is_uuid(str):
296
    if str is None:
297
        return False
298
    try:
299
        uuid.UUID(str)
300
    except ValueError:
301
        return False
302
    else:
303
        return True
304

    
305

    
306
##########################
307
# USER CATALOG utilities #
308
##########################
309

    
310
def retrieve_displayname(token, uuid, fail_silently=True):
311
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
312
                            retry=2, use_pool=True,
313
                            logger=logger)
314
    try:
315
        displayname = astakos.get_username(uuid)
316
    except NoUserName:
317
        if not fail_silently:
318
            raise ItemNotExists(uuid)
319
        else:
320
            # just return the uuid
321
            return uuid
322
    return displayname
323

    
324

    
325
def retrieve_displaynames(token, uuids, return_dict=False, fail_silently=True):
326
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
327
                            retry=2, use_pool=True,
328
                            logger=logger)
329
    catalog = astakos.get_usernames(uuids) or {}
330
    missing = list(set(uuids) - set(catalog))
331
    if missing and not fail_silently:
332
        raise ItemNotExists('Unknown displaynames: %s' % ', '.join(missing))
333
    return catalog if return_dict else [catalog.get(i) for i in uuids]
334

    
335

    
336
def retrieve_uuid(token, displayname):
337
    if is_uuid(displayname):
338
        return displayname
339

    
340
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
341
                            retry=2, use_pool=True,
342
                            logger=logger)
343
    try:
344
        uuid = astakos.get_uuid(displayname)
345
    except NoUUID:
346
        raise ItemNotExists(displayname)
347
    return uuid
348

    
349

    
350
def retrieve_uuids(token, displaynames, return_dict=False, fail_silently=True):
351
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
352
                            retry=2, use_pool=True,
353
                            logger=logger)
354
    catalog = astakos.get_uuids(displaynames) or {}
355
    missing = list(set(displaynames) - set(catalog))
356
    if missing and not fail_silently:
357
        raise ItemNotExists('Unknown uuids: %s' % ', '.join(missing))
358
    return catalog if return_dict else [catalog.get(i) for i in displaynames]
359

    
360

    
361
def replace_permissions_displayname(token, holder):
362
    if holder == '*':
363
        return holder
364
    try:
365
        # check first for a group permission
366
        account, group = holder.split(':', 1)
367
    except ValueError:
368
        return retrieve_uuid(token, holder)
369
    else:
370
        return ':'.join([retrieve_uuid(token, account), group])
371

    
372

    
373
def replace_permissions_uuid(token, holder):
374
    if holder == '*':
375
        return holder
376
    try:
377
        # check first for a group permission
378
        account, group = holder.split(':', 1)
379
    except ValueError:
380
        return retrieve_displayname(token, holder)
381
    else:
382
        return ':'.join([retrieve_displayname(token, account), group])
383

    
384

    
385
def update_sharing_meta(request, permissions, v_account,
386
                        v_container, v_object, meta):
387
    if permissions is None:
388
        return
389
    allowed, perm_path, perms = permissions
390
    if len(perms) == 0:
391
        return
392

    
393
    # replace uuid with displayname
394
    if TRANSLATE_UUIDS:
395
        perms['read'] = [replace_permissions_uuid(
396
            getattr(request, 'token', None), x)
397
            for x in perms.get('read', [])]
398
        perms['write'] = [replace_permissions_uuid(
399
            getattr(request, 'token', None), x)
400
            for x in perms.get('write', [])]
401

    
402
    ret = []
403

    
404
    r = ','.join(perms.get('read', []))
405
    if r:
406
        ret.append('read=' + r)
407
    w = ','.join(perms.get('write', []))
408
    if w:
409
        ret.append('write=' + w)
410
    meta['X-Object-Sharing'] = '; '.join(ret)
411
    if '/'.join((v_account, v_container, v_object)) != perm_path:
412
        meta['X-Object-Shared-By'] = perm_path
413
    if request.user_uniq != v_account:
414
        meta['X-Object-Allowed-To'] = allowed
415

    
416

    
417
def update_public_meta(public, meta):
418
    if not public:
419
        return
420
    meta['X-Object-Public'] = join_urls(
421
        BASE_HOST, reverse('pithos.api.public.public_demux', args=(public,)))
422

    
423

    
424
def validate_modification_preconditions(request, meta):
425
    """Check the modified timestamp conforms with the preconditions set."""
426

    
427
    if 'modified' not in meta:
428
        return  # TODO: Always return?
429

    
430
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
431
    if if_modified_since is not None:
432
        if_modified_since = parse_http_date_safe(if_modified_since)
433
    if (if_modified_since is not None
434
            and int(meta['modified']) <= if_modified_since):
435
        raise faults.NotModified('Resource has not been modified')
436

    
437
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
438
    if if_unmodified_since is not None:
439
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
440
    if (if_unmodified_since is not None
441
            and int(meta['modified']) > if_unmodified_since):
442
        raise faults.PreconditionFailed('Resource has been modified')
443

    
444

    
445
def validate_matching_preconditions(request, meta):
446
    """Check that the ETag conforms with the preconditions set."""
447

    
448
    etag = meta['hash'] if not UPDATE_MD5 else meta['checksum']
449
    if not etag:
450
        etag = None
451

    
452
    if_match = request.META.get('HTTP_IF_MATCH')
453
    if if_match is not None:
454
        if etag is None:
455
            raise faults.PreconditionFailed('Resource does not exist')
456
        if (if_match != '*'
457
                and etag not in [x.lower() for x in parse_etags(if_match)]):
458
            raise faults.PreconditionFailed('Resource ETag does not match')
459

    
460
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
461
    if if_none_match is not None:
462
        # TODO: If this passes, must ignore If-Modified-Since header.
463
        if etag is not None:
464
            if (if_none_match == '*' or etag in [x.lower() for x in
465
                                                 parse_etags(if_none_match)]):
466
                # TODO: Continue if an If-Modified-Since header is present.
467
                if request.method in ('HEAD', 'GET'):
468
                    raise faults.NotModified('Resource ETag matches')
469
                raise faults.PreconditionFailed(
470
                    'Resource exists or ETag matches')
471

    
472

    
473
def split_container_object_string(s):
474
    if not len(s) > 0 or s[0] != '/':
475
        raise ValueError
476
    s = s[1:]
477
    pos = s.find('/')
478
    if pos == -1 or pos == len(s) - 1:
479
        raise ValueError
480
    return s[:pos], s[(pos + 1):]
481

    
482

    
483
def copy_or_move_object(request, src_account, src_container, src_name,
484
                        dest_account, dest_container, dest_name,
485
                        move=False, delimiter=None):
486
    """Copy or move an object."""
487

    
488
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
489
        del(request.META['CONTENT_TYPE'])
490
    content_type, meta, permissions, public = get_object_headers(request)
491
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
492
    try:
493
        if move:
494
            version_id = request.backend.move_object(
495
                request.user_uniq, src_account, src_container, src_name,
496
                dest_account, dest_container, dest_name,
497
                content_type, 'pithos', meta, False, permissions, delimiter)
498
        else:
499
            version_id = request.backend.copy_object(
500
                request.user_uniq, src_account, src_container, src_name,
501
                dest_account, dest_container, dest_name,
502
                content_type, 'pithos', meta, False, permissions,
503
                src_version, delimiter)
504
    except NotAllowedError:
505
        raise faults.Forbidden('Not allowed')
506
    except (ItemNotExists, VersionNotExists):
507
        raise faults.ItemNotFound('Container or object does not exist')
508
    except ValueError:
509
        raise faults.BadRequest('Invalid sharing header')
510
    except QuotaError, e:
511
        raise faults.RequestEntityTooLarge('Quota error: %s' % e)
512
    if public is not None:
513
        try:
514
            request.backend.update_object_public(
515
                request.user_uniq, dest_account,
516
                dest_container, dest_name, public)
517
        except NotAllowedError:
518
            raise faults.Forbidden('Not allowed')
519
        except ItemNotExists:
520
            raise faults.ItemNotFound('Object does not exist')
521
    return version_id
522

    
523

    
524
def get_int_parameter(p):
525
    if p is not None:
526
        try:
527
            p = int(p)
528
        except ValueError:
529
            return None
530
        if p < 0:
531
            return None
532
    return p
533

    
534

    
535
def get_content_length(request):
536
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
537
    if content_length is None:
538
        raise faults.LengthRequired('Missing or invalid Content-Length header')
539
    return content_length
540

    
541

    
542
def get_range(request, size):
543
    """Parse a Range header from the request.
544

545
    Either returns None, when the header is not existent or should be ignored,
546
    or a list of (offset, length) tuples - should be further checked.
547
    """
548

    
549
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
550
    if not ranges.startswith('bytes='):
551
        return None
552

    
553
    ret = []
554
    for r in (x.strip() for x in ranges[6:].split(',')):
555
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
556
        m = p.match(r)
557
        if not m:
558
            return None
559
        offset = m.group('offset')
560
        upto = m.group('upto')
561
        if offset == '' and upto == '':
562
            return None
563

    
564
        if offset != '':
565
            offset = int(offset)
566
            if upto != '':
567
                upto = int(upto)
568
                if offset > upto:
569
                    return None
570
                ret.append((offset, upto - offset + 1))
571
            else:
572
                ret.append((offset, size - offset))
573
        else:
574
            length = int(upto)
575
            ret.append((size - length, length))
576

    
577
    return ret
578

    
579

    
580
def get_content_range(request):
581
    """Parse a Content-Range header from the request.
582

583
    Either returns None, when the header is not existent or should be ignored,
584
    or an (offset, length, total) tuple - check as length, total may be None.
585
    Returns (None, None, None) if the provided range is '*/*'.
586
    """
587

    
588
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
589
    if not ranges:
590
        return None
591

    
592
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
593
    m = p.match(ranges)
594
    if not m:
595
        if ranges == 'bytes */*':
596
            return (None, None, None)
597
        return None
598
    offset = int(m.group('offset'))
599
    upto = m.group('upto')
600
    total = m.group('total')
601
    if upto != '':
602
        upto = int(upto)
603
    else:
604
        upto = None
605
    if total != '*':
606
        total = int(total)
607
    else:
608
        total = None
609
    if (upto is not None and offset > upto) or \
610
        (total is not None and offset >= total) or \
611
            (total is not None and upto is not None and upto >= total):
612
        return None
613

    
614
    if upto is None:
615
        length = None
616
    else:
617
        length = upto - offset + 1
618
    return (offset, length, total)
619

    
620

    
621
def get_sharing(request):
622
    """Parse an X-Object-Sharing header from the request.
623

624
    Raises BadRequest on error.
625
    """
626

    
627
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
628
    if permissions is None:
629
        return None
630

    
631
    # TODO: Document or remove '~' replacing.
632
    permissions = permissions.replace('~', '')
633

    
634
    ret = {}
635
    permissions = permissions.replace(' ', '')
636
    if permissions == '':
637
        return ret
638
    for perm in (x for x in permissions.split(';')):
639
        if perm.startswith('read='):
640
            ret['read'] = list(set(
641
                [v.replace(' ', '').lower() for v in perm[5:].split(',')]))
642
            if '' in ret['read']:
643
                ret['read'].remove('')
644
            if '*' in ret['read']:
645
                ret['read'] = ['*']
646
            if len(ret['read']) == 0:
647
                raise faults.BadRequest(
648
                    'Bad X-Object-Sharing header value: invalid length')
649
        elif perm.startswith('write='):
650
            ret['write'] = list(set(
651
                [v.replace(' ', '').lower() for v in perm[6:].split(',')]))
652
            if '' in ret['write']:
653
                ret['write'].remove('')
654
            if '*' in ret['write']:
655
                ret['write'] = ['*']
656
            if len(ret['write']) == 0:
657
                raise faults.BadRequest(
658
                    'Bad X-Object-Sharing header value: invalid length')
659
        else:
660
            raise faults.BadRequest(
661
                'Bad X-Object-Sharing header value: missing prefix')
662

    
663
    # replace displayname with uuid
664
    if TRANSLATE_UUIDS:
665
        try:
666
            ret['read'] = [replace_permissions_displayname(
667
                getattr(request, 'token', None), x)
668
                for x in ret.get('read', [])]
669
            ret['write'] = [replace_permissions_displayname(
670
                getattr(request, 'token', None), x)
671
                for x in ret.get('write', [])]
672
        except ItemNotExists, e:
673
            raise faults.BadRequest(
674
                'Bad X-Object-Sharing header value: unknown account: %s' % e)
675

    
676
    # Keep duplicates only in write list.
677
    dups = [x for x in ret.get(
678
        'read', []) if x in ret.get('write', []) and x != '*']
679
    if dups:
680
        for x in dups:
681
            ret['read'].remove(x)
682
        if len(ret['read']) == 0:
683
            del(ret['read'])
684

    
685
    return ret
686

    
687

    
688
def get_public(request):
689
    """Parse an X-Object-Public header from the request.
690

691
    Raises BadRequest on error.
692
    """
693

    
694
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
695
    if public is None:
696
        return None
697

    
698
    public = public.replace(' ', '').lower()
699
    if public == 'true':
700
        return True
701
    elif public == 'false' or public == '':
702
        return False
703
    raise faults.BadRequest('Bad X-Object-Public header value')
704

    
705

    
706
def raw_input_socket(request):
707
    """Return the socket for reading the rest of the request."""
708

    
709
    server_software = request.META.get('SERVER_SOFTWARE')
710
    if server_software and server_software.startswith('mod_python'):
711
        return request._req
712
    if 'wsgi.input' in request.environ:
713
        return request.environ['wsgi.input']
714
    raise NotImplemented('Unknown server software')
715

    
716
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024)  # 5GB
717

    
718

    
719
def socket_read_iterator(request, length=0, blocksize=4096):
720
    """Return maximum of blocksize data read from the socket in each iteration
721

722
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
723
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
724
    """
725

    
726
    sock = raw_input_socket(request)
727
    if length < 0:  # Chunked transfers
728
        # Small version (server does the dechunking).
729
        if (request.environ.get('mod_wsgi.input_chunked', None)
730
                or request.META['SERVER_SOFTWARE'].startswith('gunicorn')):
731
            while length < MAX_UPLOAD_SIZE:
732
                data = sock.read(blocksize)
733
                if data == '':
734
                    return
735
                yield data
736
            raise faults.BadRequest('Maximum size is reached')
737

    
738
        # Long version (do the dechunking).
739
        data = ''
740
        while length < MAX_UPLOAD_SIZE:
741
            # Get chunk size.
742
            if hasattr(sock, 'readline'):
743
                chunk_length = sock.readline()
744
            else:
745
                chunk_length = ''
746
                while chunk_length[-1:] != '\n':
747
                    chunk_length += sock.read(1)
748
                chunk_length.strip()
749
            pos = chunk_length.find(';')
750
            if pos >= 0:
751
                chunk_length = chunk_length[:pos]
752
            try:
753
                chunk_length = int(chunk_length, 16)
754
            except Exception:
755
                raise faults.BadRequest('Bad chunk size')
756
                                 # TODO: Change to something more appropriate.
757
            # Check if done.
758
            if chunk_length == 0:
759
                if len(data) > 0:
760
                    yield data
761
                return
762
            # Get the actual data.
763
            while chunk_length > 0:
764
                chunk = sock.read(min(chunk_length, blocksize))
765
                chunk_length -= len(chunk)
766
                if length > 0:
767
                    length += len(chunk)
768
                data += chunk
769
                if len(data) >= blocksize:
770
                    ret = data[:blocksize]
771
                    data = data[blocksize:]
772
                    yield ret
773
            sock.read(2)  # CRLF
774
        raise faults.BadRequest('Maximum size is reached')
775
    else:
776
        if length > MAX_UPLOAD_SIZE:
777
            raise faults.BadRequest('Maximum size is reached')
778
        while length > 0:
779
            data = sock.read(min(length, blocksize))
780
            if not data:
781
                raise faults.BadRequest()
782
            length -= len(data)
783
            yield data
784

    
785

    
786
class SaveToBackendHandler(FileUploadHandler):
787
    """Handle a file from an HTML form the django way."""
788

    
789
    def __init__(self, request=None):
790
        super(SaveToBackendHandler, self).__init__(request)
791
        self.backend = request.backend
792

    
793
    def put_data(self, length):
794
        if len(self.data) >= length:
795
            block = self.data[:length]
796
            self.file.hashmap.append(self.backend.put_block(block))
797
            self.checksum_compute.update(block)
798
            self.data = self.data[length:]
799

    
800
    def new_file(self, field_name, file_name, content_type,
801
                 content_length, charset=None):
802
        self.checksum_compute = NoChecksum() if not UPDATE_MD5 else Checksum()
803
        self.data = ''
804
        self.file = UploadedFile(
805
            name=file_name, content_type=content_type, charset=charset)
806
        self.file.size = 0
807
        self.file.hashmap = []
808

    
809
    def receive_data_chunk(self, raw_data, start):
810
        self.data += raw_data
811
        self.file.size += len(raw_data)
812
        self.put_data(self.request.backend.block_size)
813
        return None
814

    
815
    def file_complete(self, file_size):
816
        l = len(self.data)
817
        if l > 0:
818
            self.put_data(l)
819
        self.file.etag = self.checksum_compute.hexdigest()
820
        return self.file
821

    
822

    
823
class ObjectWrapper(object):
824
    """Return the object's data block-per-block in each iteration.
825

826
    Read from the object using the offset and length provided
827
    in each entry of the range list.
828
    """
829

    
830
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
831
        self.backend = backend
832
        self.ranges = ranges
833
        self.sizes = sizes
834
        self.hashmaps = hashmaps
835
        self.boundary = boundary
836
        self.size = sum(self.sizes)
837

    
838
        self.file_index = 0
839
        self.block_index = 0
840
        self.block_hash = -1
841
        self.block = ''
842

    
843
        self.range_index = -1
844
        self.offset, self.length = self.ranges[0]
845

    
846
    def __iter__(self):
847
        return self
848

    
849
    def part_iterator(self):
850
        if self.length > 0:
851
            # Get the file for the current offset.
852
            file_size = self.sizes[self.file_index]
853
            while self.offset >= file_size:
854
                self.offset -= file_size
855
                self.file_index += 1
856
                file_size = self.sizes[self.file_index]
857

    
858
            # Get the block for the current position.
859
            self.block_index = int(self.offset / self.backend.block_size)
860
            if self.block_hash != \
861
                    self.hashmaps[self.file_index][self.block_index]:
862
                self.block_hash = self.hashmaps[
863
                    self.file_index][self.block_index]
864
                try:
865
                    self.block = self.backend.get_block(self.block_hash)
866
                except ItemNotExists:
867
                    raise faults.ItemNotFound('Block does not exist')
868

    
869
            # Get the data from the block.
870
            bo = self.offset % self.backend.block_size
871
            bs = self.backend.block_size
872
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
873
                    self.sizes[self.file_index] % self.backend.block_size):
874
                bs = self.sizes[self.file_index] % self.backend.block_size
875
            bl = min(self.length, bs - bo)
876
            data = self.block[bo:bo + bl]
877
            self.offset += bl
878
            self.length -= bl
879
            return data
880
        else:
881
            raise StopIteration
882

    
883
    def next(self):
884
        if len(self.ranges) == 1:
885
            return self.part_iterator()
886
        if self.range_index == len(self.ranges):
887
            raise StopIteration
888
        try:
889
            if self.range_index == -1:
890
                raise StopIteration
891
            return self.part_iterator()
892
        except StopIteration:
893
            self.range_index += 1
894
            out = []
895
            if self.range_index < len(self.ranges):
896
                # Part header.
897
                self.offset, self.length = self.ranges[self.range_index]
898
                self.file_index = 0
899
                if self.range_index > 0:
900
                    out.append('')
901
                out.append('--' + self.boundary)
902
                out.append('Content-Range: bytes %d-%d/%d' % (
903
                    self.offset, self.offset + self.length - 1, self.size))
904
                out.append('Content-Transfer-Encoding: binary')
905
                out.append('')
906
                out.append('')
907
                return '\r\n'.join(out)
908
            else:
909
                # Footer.
910
                out.append('')
911
                out.append('--' + self.boundary + '--')
912
                out.append('')
913
                return '\r\n'.join(out)
914

    
915

    
916
def object_data_response(request, sizes, hashmaps, meta, public=False):
917
    """Get the HttpResponse object for replying with the object's data."""
918

    
919
    # Range handling.
920
    size = sum(sizes)
921
    ranges = get_range(request, size)
922
    if ranges is None:
923
        ranges = [(0, size)]
924
        ret = 200
925
    else:
926
        check = [True for offset, length in ranges if
927
                 length <= 0 or length > size or
928
                 offset < 0 or offset >= size or
929
                 offset + length > size]
930
        if len(check) > 0:
931
            raise faults.RangeNotSatisfiable(
932
                'Requested range exceeds object limits')
933
        ret = 206
934
        if_range = request.META.get('HTTP_IF_RANGE')
935
        if if_range:
936
            try:
937
                # Modification time has passed instead.
938
                last_modified = parse_http_date(if_range)
939
                if last_modified != meta['modified']:
940
                    ranges = [(0, size)]
941
                    ret = 200
942
            except ValueError:
943
                if if_range != meta['checksum']:
944
                    ranges = [(0, size)]
945
                    ret = 200
946

    
947
    if ret == 206 and len(ranges) > 1:
948
        boundary = uuid.uuid4().hex
949
    else:
950
        boundary = ''
951
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
952
    response = HttpResponse(wrapper, status=ret)
953
    put_object_headers(
954
        response, meta, restricted=public,
955
        token=getattr(request, 'token', None),
956
        disposition_type=request.GET.get('disposition-type'))
957
    if ret == 206:
958
        if len(ranges) == 1:
959
            offset, length = ranges[0]
960
            response[
961
                'Content-Length'] = length  # Update with the correct length.
962
            response['Content-Range'] = 'bytes %d-%d/%d' % (
963
                offset, offset + length - 1, size)
964
        else:
965
            del(response['Content-Length'])
966
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (
967
                boundary,)
968
    return response
969

    
970

    
971
def put_object_block(request, hashmap, data, offset):
972
    """Put one block of data at the given offset."""
973

    
974
    bi = int(offset / request.backend.block_size)
975
    bo = offset % request.backend.block_size
976
    bl = min(len(data), request.backend.block_size - bo)
977
    if bi < len(hashmap):
978
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
979
    else:
980
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
981
    return bl  # Return ammount of data written.
982

    
983

    
984
def hashmap_md5(backend, hashmap, size):
985
    """Produce the MD5 sum from the data in the hashmap."""
986

    
987
    # TODO: Search backend for the MD5 of another object
988
    #       with the same hashmap and size...
989
    md5 = hashlib.md5()
990
    bs = backend.block_size
991
    for bi, hash in enumerate(hashmap):
992
        data = backend.get_block(hash)  # Blocks come in padded.
993
        if bi == len(hashmap) - 1:
994
            data = data[:size % bs]
995
        md5.update(data)
996
    return md5.hexdigest().lower()
997

    
998

    
999
def simple_list_response(request, l):
1000
    if request.serialization == 'text':
1001
        return '\n'.join(l) + '\n'
1002
    if request.serialization == 'xml':
1003
        return render_to_string('items.xml', {'items': l})
1004
    if request.serialization == 'json':
1005
        return json.dumps(l)
1006

    
1007

    
1008
from pithos.backends.util import PithosBackendPool
1009

    
1010
if RADOS_STORAGE:
1011
    BLOCK_PARAMS = {'mappool': RADOS_POOL_MAPS,
1012
                    'blockpool': RADOS_POOL_BLOCKS, }
1013
else:
1014
    BLOCK_PARAMS = {'mappool': None,
1015
                    'blockpool': None, }
1016

    
1017
BACKEND_KWARGS = dict(
1018
    db_module=BACKEND_DB_MODULE,
1019
    db_connection=BACKEND_DB_CONNECTION,
1020
    block_module=BACKEND_BLOCK_MODULE,
1021
    block_path=BACKEND_BLOCK_PATH,
1022
    block_umask=BACKEND_BLOCK_UMASK,
1023
    block_size=BACKEND_BLOCK_SIZE,
1024
    hash_algorithm=BACKEND_HASH_ALGORITHM,
1025
    queue_module=BACKEND_QUEUE_MODULE,
1026
    queue_hosts=BACKEND_QUEUE_HOSTS,
1027
    queue_exchange=BACKEND_QUEUE_EXCHANGE,
1028
    astakos_auth_url=ASTAKOS_AUTH_URL,
1029
    service_token=SERVICE_TOKEN,
1030
    astakosclient_poolsize=ASTAKOSCLIENT_POOLSIZE,
1031
    free_versioning=BACKEND_FREE_VERSIONING,
1032
    block_params=BLOCK_PARAMS,
1033
    public_url_security=PUBLIC_URL_SECURITY,
1034
    public_url_alphabet=PUBLIC_URL_ALPHABET,
1035
    account_quota_policy=BACKEND_ACCOUNT_QUOTA,
1036
    container_quota_policy=BACKEND_CONTAINER_QUOTA,
1037
    container_versioning_policy=BACKEND_VERSIONING)
1038

    
1039
_pithos_backend_pool = PithosBackendPool(size=BACKEND_POOL_SIZE,
1040
                                         **BACKEND_KWARGS)
1041

    
1042

    
1043
def get_backend():
1044
    if BACKEND_POOL_ENABLED:
1045
        backend = _pithos_backend_pool.pool_get()
1046
    else:
1047
        backend = connect_backend(**BACKEND_KWARGS)
1048
    backend.serials = []
1049
    backend.messages = []
1050
    return backend
1051

    
1052

    
1053
def update_request_headers(request):
1054
    # Handle URL-encoded keys and values.
1055
    meta = dict([(
1056
        k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
1057
    for k, v in meta.iteritems():
1058
        try:
1059
            k.decode('ascii')
1060
            v.decode('ascii')
1061
        except UnicodeDecodeError:
1062
            raise faults.BadRequest('Bad character in headers.')
1063
        if '%' in k or '%' in v:
1064
            del(request.META[k])
1065
            request.META[unquote(k)] = smart_unicode(unquote(
1066
                v), strings_only=True)
1067

    
1068

    
1069
def update_response_headers(request, response):
1070
    # URL-encode unicode in headers.
1071
    meta = response.items()
1072
    for k, v in meta:
1073
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
1074
                k.startswith('X-Object-') or k.startswith('Content-')):
1075
            del(response[k])
1076
            response[quote(k)] = quote(v, safe='/=,:@; ')
1077

    
1078

    
1079
def api_method(http_method=None, token_required=True, user_required=True,
1080
               logger=None, format_allowed=False, serializations=None,
1081
               strict_serlization=False, lock_container_path=False):
1082
    serializations = serializations or ['json', 'xml']
1083

    
1084
    def decorator(func):
1085
        @api.api_method(http_method=http_method, token_required=token_required,
1086
                        user_required=user_required,
1087
                        logger=logger, format_allowed=format_allowed,
1088
                        astakos_auth_url=ASTAKOS_AUTH_URL,
1089
                        serializations=serializations,
1090
                        strict_serlization=strict_serlization)
1091
        @wraps(func)
1092
        def wrapper(request, *args, **kwargs):
1093
            # The args variable may contain up to (account, container, object).
1094
            if len(args) > 1 and len(args[1]) > 256:
1095
                raise faults.BadRequest("Container name too large")
1096
            if len(args) > 2 and len(args[2]) > 1024:
1097
                raise faults.BadRequest('Object name too large.')
1098

    
1099
            success_status = False
1100
            try:
1101
                # Add a PithosBackend as attribute of the request object
1102
                request.backend = get_backend()
1103
                request.backend.pre_exec(lock_container_path)
1104

    
1105
                # Many API method expect thet X-Auth-Token in request,token
1106
                request.token = request.x_auth_token
1107
                update_request_headers(request)
1108
                response = func(request, *args, **kwargs)
1109
                update_response_headers(request, response)
1110

    
1111
                success_status = True
1112
                return response
1113
            finally:
1114
                # Always close PithosBackend connection
1115
                if getattr(request, "backend", None) is not None:
1116
                    request.backend.post_exec(success_status)
1117
                    request.backend.close()
1118
        return wrapper
1119
    return decorator
1120

    
1121

    
1122
def restrict_to_host(host=None):
1123
    """
1124
    View decorator which restricts wrapped view to be accessed only under the
1125
    host set. If an invalid host is identified and request HTTP method is one
1126
    of ``GET``, ``HOST``, the decorator will return a redirect response using a
1127
    clone of the request with host replaced to the one the restriction applies
1128
    to.
1129

1130
    e.g.
1131
    @restrict_to_host('files.example.com')
1132
    my_restricted_view(request, path):
1133
        return HttpResponse(file(path).read())
1134

1135
    A get to ``https://api.example.com/my_restricted_view/file_path/?param=1``
1136
    will return a redirect response with Location header set to
1137
    ``https://files.example.com/my_restricted_view/file_path/?param=1``.
1138

1139
    If host is set to ``None`` no restriction will be applied.
1140
    """
1141
    def decorator(func):
1142
        # skip decoration if no host is set
1143
        if not host:
1144
            return func
1145

    
1146
        @wraps(func)
1147
        def wrapper(request, *args, **kwargs):
1148
            request_host = request.get_host()
1149
            if host != request_host:
1150
                proto = 'https' if request.is_secure() else 'http'
1151
                if request.method in ['GET', 'HEAD']:
1152
                    full_path = request.get_full_path()
1153
                    redirect_uri = "%s://%s%s" % (proto, host, full_path)
1154
                    return HttpResponseRedirect(redirect_uri)
1155
                else:
1156
                    raise PermissionDenied
1157
            return func(request, *args, **kwargs)
1158
        return wrapper
1159
    return decorator
1160

    
1161

    
1162
def view_method():
1163
    """Decorator function for views."""
1164

    
1165
    def decorator(func):
1166
        @restrict_to_host(UNSAFE_DOMAIN)
1167
        @wraps(func)
1168
        def wrapper(request, *args, **kwargs):
1169
            if request.method not in ['GET', 'HEAD']:
1170
                return HttpResponseNotAllowed(['GET', 'HEAD'])
1171

    
1172
            try:
1173
                access_token = request.GET.get('access_token')
1174
                requested_resource = text.uenc(request.path.split(VIEW_PREFIX,
1175
                                                                  2)[-1])
1176
                astakos = AstakosClient(SERVICE_TOKEN, ASTAKOS_AUTH_URL,
1177
                                        retry=2, use_pool=True,
1178
                                        logger=logger)
1179
                if access_token is not None:
1180
                    # authenticate using the short-term access token
1181
                    try:
1182
                        request.user = astakos.validate_token(
1183
                            access_token, requested_resource)
1184
                    except AstakosClientException:
1185
                        return HttpResponseRedirect(request.path)
1186
                    request.user_uniq = request.user["access"]["user"]["id"]
1187

    
1188
                    _func = api_method(token_required=False,
1189
                                       user_required=False)(func)
1190
                    response = _func(request, *args, **kwargs)
1191
                    if response.status_code == 404:
1192
                        raise Http404
1193
                    elif response.status_code == 403:
1194
                        raise PermissionDenied
1195
                    return response
1196

    
1197
                client_id, client_secret = OAUTH2_CLIENT_CREDENTIALS
1198
                # TODO: check if client credentials are not set
1199
                authorization_code = request.GET.get('code')
1200
                redirect_uri = unquote(request.build_absolute_uri(
1201
                    request.get_full_path()))
1202
                if authorization_code is None:
1203
                    # request authorization code
1204
                    params = {'response_type': 'code',
1205
                              'client_id': client_id,
1206
                              'redirect_uri': redirect_uri,
1207
                              'state': '',  # TODO include state for security
1208
                              'scope': requested_resource}
1209
                    return HttpResponseRedirect('%s?%s' %
1210
                                                (join_urls(astakos.oauth2_url,
1211
                                                           'auth'),
1212
                                                 urlencode(params)))
1213
                else:
1214
                    # request short-term access token
1215
                    parts = list(urlsplit(redirect_uri))
1216
                    params = dict(parse_qsl(parts[3], keep_blank_values=True))
1217
                    if 'code' in params:  # always True
1218
                        del params['code']
1219
                    if 'state' in params:
1220
                        del params['state']
1221
                    parts[3] = urlencode(params)
1222
                    redirect_uri = urlunsplit(parts)
1223
                    data = astakos.get_token('authorization_code',
1224
                                             *OAUTH2_CLIENT_CREDENTIALS,
1225
                                             redirect_uri=redirect_uri,
1226
                                             scope=requested_resource,
1227
                                             code=authorization_code)
1228
                    params['access_token'] = data.get('access_token', '')
1229
                    parts[3] = urlencode(params)
1230
                    redirect_uri = urlunsplit(parts)
1231
                    return HttpResponseRedirect(redirect_uri)
1232
            except AstakosClientException, err:
1233
                logger.exception(err)
1234
                raise PermissionDenied
1235
        return wrapper
1236
    return decorator
1237

    
1238

    
1239
class Checksum:
1240
    def __init__(self):
1241
        self.md5 = hashlib.md5()
1242

    
1243
    def update(self, data):
1244
        self.md5.update(data)
1245

    
1246
    def hexdigest(self):
1247
        return self.md5.hexdigest().lower()
1248

    
1249

    
1250
class NoChecksum:
1251
    def update(self, data):
1252
        pass
1253

    
1254
    def hexdigest(self):
1255
        return ''