Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 1d2af25c

History | View | Annotate | Download (45.5 kB)

1
# Copyright 2011-2013 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
#
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
#
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
#
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from datetime import datetime
36
from urllib import quote, unquote, urlencode
37

    
38
from django.http import (HttpResponse, Http404, HttpResponseRedirect,
39
                         HttpResponseNotAllowed)
40
from django.template.loader import render_to_string
41
from django.utils import simplejson as json
42
from django.utils.http import http_date, parse_etags
43
from django.utils.encoding import smart_unicode, smart_str
44
from django.core.files.uploadhandler import FileUploadHandler
45
from django.core.files.uploadedfile import UploadedFile
46
from django.core.urlresolvers import reverse
47
from django.core.exceptions import PermissionDenied
48

    
49
from snf_django.lib.api.parsedate import parse_http_date_safe, parse_http_date
50
from snf_django.lib import api
51
from snf_django.lib.api import faults, utils
52

    
53
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
54
                                 BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
55
                                 BACKEND_BLOCK_UMASK,
56
                                 BACKEND_QUEUE_MODULE, BACKEND_QUEUE_HOSTS,
57
                                 BACKEND_QUEUE_EXCHANGE,
58
                                 ASTAKOSCLIENT_POOLSIZE,
59
                                 SERVICE_TOKEN,
60
                                 ASTAKOS_AUTH_URL,
61
                                 BACKEND_ACCOUNT_QUOTA,
62
                                 BACKEND_CONTAINER_QUOTA,
63
                                 BACKEND_VERSIONING, BACKEND_FREE_VERSIONING,
64
                                 BACKEND_POOL_ENABLED, BACKEND_POOL_SIZE,
65
                                 BACKEND_BLOCK_SIZE, BACKEND_HASH_ALGORITHM,
66
                                 RADOS_STORAGE, RADOS_POOL_BLOCKS,
67
                                 RADOS_POOL_MAPS, TRANSLATE_UUIDS,
68
                                 PUBLIC_URL_SECURITY, PUBLIC_URL_ALPHABET,
69
                                 BASE_HOST, UPDATE_MD5, VIEW_PREFIX,
70
                                 OAUTH2_CLIENT_CREDENTIALS, UNSAFE_DOMAIN)
71

    
72
from pithos.api.resources import resources
73
from pithos.backends import connect_backend
74
from pithos.backends.base import (NotAllowedError, QuotaError, ItemNotExists,
75
                                  VersionNotExists)
76

    
77
from synnefo.lib import join_urls
78
from synnefo.util import text
79

    
80
from astakosclient import AstakosClient
81
from astakosclient.errors import NoUserName, NoUUID, AstakosClientException
82

    
83
import logging
84
import re
85
import hashlib
86
import uuid
87
import decimal
88

    
89
logger = logging.getLogger(__name__)
90

    
91

    
92
def json_encode_decimal(obj):
93
    if isinstance(obj, decimal.Decimal):
94
        return str(obj)
95
    raise TypeError(repr(obj) + " is not JSON serializable")
96

    
97

    
98
def rename_meta_key(d, old, new):
99
    if old not in d:
100
        return
101
    d[new] = d[old]
102
    del(d[old])
103

    
104

    
105
def printable_header_dict(d):
106
    """Format a meta dictionary for printing out json/xml.
107

108
    Convert all keys to lower case and replace dashes with underscores.
109
    Format 'last_modified' timestamp.
110
    """
111

    
112
    timestamps = ('last_modified', 'x_container_until_timestamp',
113
                  'x_acount_until_timestamp')
114
    for timestamp in timestamps:
115
        if timestamp in d and d[timestamp]:
116
            d[timestamp] = utils.isoformat(
117
                datetime.fromtimestamp(d[timestamp]))
118
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
119

    
120

    
121
def format_header_key(k):
122
    """Convert underscores to dashes and capitalize intra-dash strings."""
123
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
124

    
125

    
126
def get_header_prefix(request, prefix):
127
    """Get all prefix-* request headers in a dict.
128
       Reformat keys with format_header_key()."""
129

    
130
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
131
    # TODO: Document or remove '~' replacing.
132
    return dict([(format_header_key(k[5:]), v.replace('~', ''))
133
                for k, v in request.META.iteritems()
134
                if k.startswith(prefix) and len(k) > len(prefix)])
135

    
136

    
137
def check_meta_headers(meta):
138
    if len(meta) > 90:
139
        raise faults.BadRequest('Too many headers.')
140
    for k, v in meta.iteritems():
141
        if len(k) > 128:
142
            raise faults.BadRequest('Header name too large.')
143
        if len(v) > 256:
144
            raise faults.BadRequest('Header value too large.')
145

    
146

    
147
def get_account_headers(request):
148
    meta = get_header_prefix(request, 'X-Account-Meta-')
149
    check_meta_headers(meta)
150
    groups = {}
151
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
152
        n = k[16:].lower()
153
        if '-' in n or '_' in n:
154
            raise faults.BadRequest('Bad characters in group name')
155
        groups[n] = v.replace(' ', '').split(',')
156
        while '' in groups[n]:
157
            groups[n].remove('')
158
    return meta, groups
159

    
160

    
161
def put_account_headers(response, meta, groups, policy):
162
    if 'count' in meta:
163
        response['X-Account-Container-Count'] = meta['count']
164
    if 'bytes' in meta:
165
        response['X-Account-Bytes-Used'] = meta['bytes']
166
    response['Last-Modified'] = http_date(int(meta['modified']))
167
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
168
        response[smart_str(
169
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
170
    if 'until_timestamp' in meta:
171
        response['X-Account-Until-Timestamp'] = http_date(
172
            int(meta['until_timestamp']))
173
    for k, v in groups.iteritems():
174
        k = smart_str(k, strings_only=True)
175
        k = format_header_key('X-Account-Group-' + k)
176
        v = smart_str(','.join(v), strings_only=True)
177
        response[k] = v
178
    for k, v in policy.iteritems():
179
        response[smart_str(format_header_key('X-Account-Policy-' + k),
180
                 strings_only=True)] = smart_str(v, strings_only=True)
181

    
182

    
183
def get_container_headers(request):
184
    meta = get_header_prefix(request, 'X-Container-Meta-')
185
    check_meta_headers(meta)
186
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in
187
                  get_header_prefix(request,
188
                                    'X-Container-Policy-').iteritems()])
189
    return meta, policy
190

    
191

    
192
def put_container_headers(request, response, meta, policy):
193
    if 'count' in meta:
194
        response['X-Container-Object-Count'] = meta['count']
195
    if 'bytes' in meta:
196
        response['X-Container-Bytes-Used'] = meta['bytes']
197
    response['Last-Modified'] = http_date(int(meta['modified']))
198
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
199
        response[smart_str(
200
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
201
    l = [smart_str(x, strings_only=True) for x in meta['object_meta']
202
         if x.startswith('X-Object-Meta-')]
203
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
204
    response['X-Container-Block-Size'] = request.backend.block_size
205
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
206
    if 'until_timestamp' in meta:
207
        response['X-Container-Until-Timestamp'] = http_date(
208
            int(meta['until_timestamp']))
209
    for k, v in policy.iteritems():
210
        response[smart_str(format_header_key('X-Container-Policy-' + k),
211
                           strings_only=True)] = smart_str(v,
212
                                                           strings_only=True)
213

    
214

    
215
def get_object_headers(request):
216
    content_type = request.META.get('CONTENT_TYPE', None)
217
    meta = get_header_prefix(request, 'X-Object-Meta-')
218
    check_meta_headers(meta)
219
    if request.META.get('HTTP_CONTENT_ENCODING'):
220
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
221
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
222
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
223
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
224
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
225
    return content_type, meta, get_sharing(request), get_public(request)
226

    
227

    
228
def put_object_headers(response, meta, restricted=False, token=None,
229
                       disposition_type=None):
230
    response['ETag'] = meta['hash'] if not UPDATE_MD5 else meta['checksum']
231
    response['Content-Length'] = meta['bytes']
232
    response.override_serialization = True
233
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
234
    response['Last-Modified'] = http_date(int(meta['modified']))
235
    if not restricted:
236
        response['X-Object-Hash'] = meta['hash']
237
        response['X-Object-UUID'] = meta['uuid']
238
        if TRANSLATE_UUIDS:
239
            meta['modified_by'] = \
240
                retrieve_displayname(token, meta['modified_by'])
241
        response['X-Object-Modified-By'] = smart_str(
242
            meta['modified_by'], strings_only=True)
243
        response['X-Object-Version'] = meta['version']
244
        response['X-Object-Version-Timestamp'] = http_date(
245
            int(meta['version_timestamp']))
246
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
247
            response[smart_str(
248
                k, strings_only=True)] = smart_str(meta[k], strings_only=True)
249
        for k in (
250
            'Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
251
            'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
252
                'X-Object-Public'):
253
            if k in meta:
254
                response[k] = smart_str(meta[k], strings_only=True)
255
    else:
256
        for k in ('Content-Encoding', 'Content-Disposition'):
257
            if k in meta:
258
                response[k] = smart_str(meta[k], strings_only=True)
259
    disposition_type = disposition_type if disposition_type in \
260
        ('inline', 'attachment') else None
261
    if disposition_type is not None:
262
        response['Content-Disposition'] = '%s; filename=%s' % (
263
            disposition_type, meta['name'])
264

    
265

    
266
def update_manifest_meta(request, v_account, meta):
267
    """Update metadata if the object has an X-Object-Manifest."""
268

    
269
    if 'X-Object-Manifest' in meta:
270
        etag = ''
271
        bytes = 0
272
        try:
273
            src_container, src_name = split_container_object_string(
274
                '/' + meta['X-Object-Manifest'])
275
            objects = request.backend.list_objects(
276
                request.user_uniq, v_account,
277
                src_container, prefix=src_name, virtual=False)
278
            for x in objects:
279
                src_meta = request.backend.get_object_meta(
280
                    request.user_uniq, v_account, src_container, x[0],
281
                    'pithos', x[1])
282
                etag += (src_meta['hash'] if not UPDATE_MD5 else
283
                         src_meta['checksum'])
284
                bytes += src_meta['bytes']
285
        except:
286
            # Ignore errors.
287
            return
288
        meta['bytes'] = bytes
289
        md5 = hashlib.md5()
290
        md5.update(etag)
291
        meta['checksum'] = md5.hexdigest().lower()
292

    
293

    
294
def is_uuid(str):
295
    if str is None:
296
        return False
297
    try:
298
        uuid.UUID(str)
299
    except ValueError:
300
        return False
301
    else:
302
        return True
303

    
304

    
305
##########################
306
# USER CATALOG utilities #
307
##########################
308

    
309
def retrieve_displayname(token, uuid, fail_silently=True):
310
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
311
                            retry=2, use_pool=True,
312
                            logger=logger)
313
    try:
314
        displayname = astakos.get_username(uuid)
315
    except NoUserName:
316
        if not fail_silently:
317
            raise ItemNotExists(uuid)
318
        else:
319
            # just return the uuid
320
            return uuid
321
    return displayname
322

    
323

    
324
def retrieve_displaynames(token, uuids, return_dict=False, fail_silently=True):
325
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
326
                            retry=2, use_pool=True,
327
                            logger=logger)
328
    catalog = astakos.get_usernames(uuids) or {}
329
    missing = list(set(uuids) - set(catalog))
330
    if missing and not fail_silently:
331
        raise ItemNotExists('Unknown displaynames: %s' % ', '.join(missing))
332
    return catalog if return_dict else [catalog.get(i) for i in uuids]
333

    
334

    
335
def retrieve_uuid(token, displayname):
336
    if is_uuid(displayname):
337
        return displayname
338

    
339
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
340
                            retry=2, use_pool=True,
341
                            logger=logger)
342
    try:
343
        uuid = astakos.get_uuid(displayname)
344
    except NoUUID:
345
        raise ItemNotExists(displayname)
346
    return uuid
347

    
348

    
349
def retrieve_uuids(token, displaynames, return_dict=False, fail_silently=True):
350
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
351
                            retry=2, use_pool=True,
352
                            logger=logger)
353
    catalog = astakos.get_uuids(displaynames) or {}
354
    missing = list(set(displaynames) - set(catalog))
355
    if missing and not fail_silently:
356
        raise ItemNotExists('Unknown uuids: %s' % ', '.join(missing))
357
    return catalog if return_dict else [catalog.get(i) for i in displaynames]
358

    
359

    
360
def replace_permissions_displayname(token, holder):
361
    if holder == '*':
362
        return holder
363
    try:
364
        # check first for a group permission
365
        account, group = holder.split(':', 1)
366
    except ValueError:
367
        return retrieve_uuid(token, holder)
368
    else:
369
        return ':'.join([retrieve_uuid(token, account), group])
370

    
371

    
372
def replace_permissions_uuid(token, holder):
373
    if holder == '*':
374
        return holder
375
    try:
376
        # check first for a group permission
377
        account, group = holder.split(':', 1)
378
    except ValueError:
379
        return retrieve_displayname(token, holder)
380
    else:
381
        return ':'.join([retrieve_displayname(token, account), group])
382

    
383

    
384
def update_sharing_meta(request, permissions, v_account,
385
                        v_container, v_object, meta):
386
    if permissions is None:
387
        return
388
    allowed, perm_path, perms = permissions
389
    if len(perms) == 0:
390
        return
391

    
392
    # replace uuid with displayname
393
    if TRANSLATE_UUIDS:
394
        perms['read'] = [replace_permissions_uuid(
395
            getattr(request, 'token', None), x)
396
            for x in perms.get('read', [])]
397
        perms['write'] = [replace_permissions_uuid(
398
            getattr(request, 'token', None), x)
399
            for x in perms.get('write', [])]
400

    
401
    ret = []
402

    
403
    r = ','.join(perms.get('read', []))
404
    if r:
405
        ret.append('read=' + r)
406
    w = ','.join(perms.get('write', []))
407
    if w:
408
        ret.append('write=' + w)
409
    meta['X-Object-Sharing'] = '; '.join(ret)
410
    if '/'.join((v_account, v_container, v_object)) != perm_path:
411
        meta['X-Object-Shared-By'] = perm_path
412
    if request.user_uniq != v_account:
413
        meta['X-Object-Allowed-To'] = allowed
414

    
415

    
416
def update_public_meta(public, meta):
417
    if not public:
418
        return
419
    meta['X-Object-Public'] = join_urls(
420
        BASE_HOST, reverse('pithos.api.public.public_demux', args=(public,)))
421

    
422

    
423
def validate_modification_preconditions(request, meta):
424
    """Check the modified timestamp conforms with the preconditions set."""
425

    
426
    if 'modified' not in meta:
427
        return  # TODO: Always return?
428

    
429
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
430
    if if_modified_since is not None:
431
        if_modified_since = parse_http_date_safe(if_modified_since)
432
    if (if_modified_since is not None
433
            and int(meta['modified']) <= if_modified_since):
434
        raise faults.NotModified('Resource has not been modified')
435

    
436
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
437
    if if_unmodified_since is not None:
438
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
439
    if (if_unmodified_since is not None
440
            and int(meta['modified']) > if_unmodified_since):
441
        raise faults.PreconditionFailed('Resource has been modified')
442

    
443

    
444
def validate_matching_preconditions(request, meta):
445
    """Check that the ETag conforms with the preconditions set."""
446

    
447
    etag = meta['hash'] if not UPDATE_MD5 else meta['checksum']
448
    if not etag:
449
        etag = None
450

    
451
    if_match = request.META.get('HTTP_IF_MATCH')
452
    if if_match is not None:
453
        if etag is None:
454
            raise faults.PreconditionFailed('Resource does not exist')
455
        if (if_match != '*'
456
                and etag not in [x.lower() for x in parse_etags(if_match)]):
457
            raise faults.PreconditionFailed('Resource ETag does not match')
458

    
459
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
460
    if if_none_match is not None:
461
        # TODO: If this passes, must ignore If-Modified-Since header.
462
        if etag is not None:
463
            if (if_none_match == '*' or etag in [x.lower() for x in
464
                                                 parse_etags(if_none_match)]):
465
                # TODO: Continue if an If-Modified-Since header is present.
466
                if request.method in ('HEAD', 'GET'):
467
                    raise faults.NotModified('Resource ETag matches')
468
                raise faults.PreconditionFailed(
469
                    'Resource exists or ETag matches')
470

    
471

    
472
def split_container_object_string(s):
473
    if not len(s) > 0 or s[0] != '/':
474
        raise ValueError
475
    s = s[1:]
476
    pos = s.find('/')
477
    if pos == -1 or pos == len(s) - 1:
478
        raise ValueError
479
    return s[:pos], s[(pos + 1):]
480

    
481

    
482
def copy_or_move_object(request, src_account, src_container, src_name,
483
                        dest_account, dest_container, dest_name,
484
                        move=False, delimiter=None):
485
    """Copy or move an object."""
486

    
487
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
488
        del(request.META['CONTENT_TYPE'])
489
    content_type, meta, permissions, public = get_object_headers(request)
490
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
491
    try:
492
        if move:
493
            version_id = request.backend.move_object(
494
                request.user_uniq, src_account, src_container, src_name,
495
                dest_account, dest_container, dest_name,
496
                content_type, 'pithos', meta, False, permissions, delimiter)
497
        else:
498
            version_id = request.backend.copy_object(
499
                request.user_uniq, src_account, src_container, src_name,
500
                dest_account, dest_container, dest_name,
501
                content_type, 'pithos', meta, False, permissions,
502
                src_version, delimiter)
503
    except NotAllowedError:
504
        raise faults.Forbidden('Not allowed')
505
    except (ItemNotExists, VersionNotExists):
506
        raise faults.ItemNotFound('Container or object does not exist')
507
    except ValueError:
508
        raise faults.BadRequest('Invalid sharing header')
509
    except QuotaError, e:
510
        raise faults.RequestEntityTooLarge('Quota error: %s' % e)
511
    if public is not None:
512
        try:
513
            request.backend.update_object_public(
514
                request.user_uniq, dest_account,
515
                dest_container, dest_name, public)
516
        except NotAllowedError:
517
            raise faults.Forbidden('Not allowed')
518
        except ItemNotExists:
519
            raise faults.ItemNotFound('Object does not exist')
520
    return version_id
521

    
522

    
523
def get_int_parameter(p):
524
    if p is not None:
525
        try:
526
            p = int(p)
527
        except ValueError:
528
            return None
529
        if p < 0:
530
            return None
531
    return p
532

    
533

    
534
def get_content_length(request):
535
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
536
    if content_length is None:
537
        raise faults.LengthRequired('Missing or invalid Content-Length header')
538
    return content_length
539

    
540

    
541
def get_range(request, size):
542
    """Parse a Range header from the request.
543

544
    Either returns None, when the header is not existent or should be ignored,
545
    or a list of (offset, length) tuples - should be further checked.
546
    """
547

    
548
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
549
    if not ranges.startswith('bytes='):
550
        return None
551

    
552
    ret = []
553
    for r in (x.strip() for x in ranges[6:].split(',')):
554
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
555
        m = p.match(r)
556
        if not m:
557
            return None
558
        offset = m.group('offset')
559
        upto = m.group('upto')
560
        if offset == '' and upto == '':
561
            return None
562

    
563
        if offset != '':
564
            offset = int(offset)
565
            if upto != '':
566
                upto = int(upto)
567
                if offset > upto:
568
                    return None
569
                ret.append((offset, upto - offset + 1))
570
            else:
571
                ret.append((offset, size - offset))
572
        else:
573
            length = int(upto)
574
            ret.append((size - length, length))
575

    
576
    return ret
577

    
578

    
579
def get_content_range(request):
580
    """Parse a Content-Range header from the request.
581

582
    Either returns None, when the header is not existent or should be ignored,
583
    or an (offset, length, total) tuple - check as length, total may be None.
584
    Returns (None, None, None) if the provided range is '*/*'.
585
    """
586

    
587
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
588
    if not ranges:
589
        return None
590

    
591
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
592
    m = p.match(ranges)
593
    if not m:
594
        if ranges == 'bytes */*':
595
            return (None, None, None)
596
        return None
597
    offset = int(m.group('offset'))
598
    upto = m.group('upto')
599
    total = m.group('total')
600
    if upto != '':
601
        upto = int(upto)
602
    else:
603
        upto = None
604
    if total != '*':
605
        total = int(total)
606
    else:
607
        total = None
608
    if (upto is not None and offset > upto) or \
609
        (total is not None and offset >= total) or \
610
            (total is not None and upto is not None and upto >= total):
611
        return None
612

    
613
    if upto is None:
614
        length = None
615
    else:
616
        length = upto - offset + 1
617
    return (offset, length, total)
618

    
619

    
620
def get_sharing(request):
621
    """Parse an X-Object-Sharing header from the request.
622

623
    Raises BadRequest on error.
624
    """
625

    
626
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
627
    if permissions is None:
628
        return None
629

    
630
    # TODO: Document or remove '~' replacing.
631
    permissions = permissions.replace('~', '')
632

    
633
    ret = {}
634
    permissions = permissions.replace(' ', '')
635
    if permissions == '':
636
        return ret
637
    for perm in (x for x in permissions.split(';')):
638
        if perm.startswith('read='):
639
            ret['read'] = list(set(
640
                [v.replace(' ', '').lower() for v in perm[5:].split(',')]))
641
            if '' in ret['read']:
642
                ret['read'].remove('')
643
            if '*' in ret['read']:
644
                ret['read'] = ['*']
645
            if len(ret['read']) == 0:
646
                raise faults.BadRequest(
647
                    'Bad X-Object-Sharing header value: invalid length')
648
        elif perm.startswith('write='):
649
            ret['write'] = list(set(
650
                [v.replace(' ', '').lower() for v in perm[6:].split(',')]))
651
            if '' in ret['write']:
652
                ret['write'].remove('')
653
            if '*' in ret['write']:
654
                ret['write'] = ['*']
655
            if len(ret['write']) == 0:
656
                raise faults.BadRequest(
657
                    'Bad X-Object-Sharing header value: invalid length')
658
        else:
659
            raise faults.BadRequest(
660
                'Bad X-Object-Sharing header value: missing prefix')
661

    
662
    # replace displayname with uuid
663
    if TRANSLATE_UUIDS:
664
        try:
665
            ret['read'] = [replace_permissions_displayname(
666
                getattr(request, 'token', None), x)
667
                for x in ret.get('read', [])]
668
            ret['write'] = [replace_permissions_displayname(
669
                getattr(request, 'token', None), x)
670
                for x in ret.get('write', [])]
671
        except ItemNotExists, e:
672
            raise faults.BadRequest(
673
                'Bad X-Object-Sharing header value: unknown account: %s' % e)
674

    
675
    # Keep duplicates only in write list.
676
    dups = [x for x in ret.get(
677
        'read', []) if x in ret.get('write', []) and x != '*']
678
    if dups:
679
        for x in dups:
680
            ret['read'].remove(x)
681
        if len(ret['read']) == 0:
682
            del(ret['read'])
683

    
684
    return ret
685

    
686

    
687
def get_public(request):
688
    """Parse an X-Object-Public header from the request.
689

690
    Raises BadRequest on error.
691
    """
692

    
693
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
694
    if public is None:
695
        return None
696

    
697
    public = public.replace(' ', '').lower()
698
    if public == 'true':
699
        return True
700
    elif public == 'false' or public == '':
701
        return False
702
    raise faults.BadRequest('Bad X-Object-Public header value')
703

    
704

    
705
def raw_input_socket(request):
706
    """Return the socket for reading the rest of the request."""
707

    
708
    server_software = request.META.get('SERVER_SOFTWARE')
709
    if server_software and server_software.startswith('mod_python'):
710
        return request._req
711
    if 'wsgi.input' in request.environ:
712
        return request.environ['wsgi.input']
713
    raise NotImplemented('Unknown server software')
714

    
715
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024)  # 5GB
716

    
717

    
718
def socket_read_iterator(request, length=0, blocksize=4096):
719
    """Return maximum of blocksize data read from the socket in each iteration
720

721
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
722
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
723
    """
724

    
725
    sock = raw_input_socket(request)
726
    if length < 0:  # Chunked transfers
727
        # Small version (server does the dechunking).
728
        if (request.environ.get('mod_wsgi.input_chunked', None)
729
                or request.META['SERVER_SOFTWARE'].startswith('gunicorn')):
730
            while length < MAX_UPLOAD_SIZE:
731
                data = sock.read(blocksize)
732
                if data == '':
733
                    return
734
                yield data
735
            raise faults.BadRequest('Maximum size is reached')
736

    
737
        # Long version (do the dechunking).
738
        data = ''
739
        while length < MAX_UPLOAD_SIZE:
740
            # Get chunk size.
741
            if hasattr(sock, 'readline'):
742
                chunk_length = sock.readline()
743
            else:
744
                chunk_length = ''
745
                while chunk_length[-1:] != '\n':
746
                    chunk_length += sock.read(1)
747
                chunk_length.strip()
748
            pos = chunk_length.find(';')
749
            if pos >= 0:
750
                chunk_length = chunk_length[:pos]
751
            try:
752
                chunk_length = int(chunk_length, 16)
753
            except Exception:
754
                raise faults.BadRequest('Bad chunk size')
755
                                 # TODO: Change to something more appropriate.
756
            # Check if done.
757
            if chunk_length == 0:
758
                if len(data) > 0:
759
                    yield data
760
                return
761
            # Get the actual data.
762
            while chunk_length > 0:
763
                chunk = sock.read(min(chunk_length, blocksize))
764
                chunk_length -= len(chunk)
765
                if length > 0:
766
                    length += len(chunk)
767
                data += chunk
768
                if len(data) >= blocksize:
769
                    ret = data[:blocksize]
770
                    data = data[blocksize:]
771
                    yield ret
772
            sock.read(2)  # CRLF
773
        raise faults.BadRequest('Maximum size is reached')
774
    else:
775
        if length > MAX_UPLOAD_SIZE:
776
            raise faults.BadRequest('Maximum size is reached')
777
        while length > 0:
778
            data = sock.read(min(length, blocksize))
779
            if not data:
780
                raise faults.BadRequest()
781
            length -= len(data)
782
            yield data
783

    
784

    
785
class SaveToBackendHandler(FileUploadHandler):
786
    """Handle a file from an HTML form the django way."""
787

    
788
    def __init__(self, request=None):
789
        super(SaveToBackendHandler, self).__init__(request)
790
        self.backend = request.backend
791

    
792
    def put_data(self, length):
793
        if len(self.data) >= length:
794
            block = self.data[:length]
795
            self.file.hashmap.append(self.backend.put_block(block))
796
            self.checksum_compute.update(block)
797
            self.data = self.data[length:]
798

    
799
    def new_file(self, field_name, file_name, content_type,
800
                 content_length, charset=None):
801
        self.checksum_compute = NoChecksum() if not UPDATE_MD5 else Checksum()
802
        self.data = ''
803
        self.file = UploadedFile(
804
            name=file_name, content_type=content_type, charset=charset)
805
        self.file.size = 0
806
        self.file.hashmap = []
807

    
808
    def receive_data_chunk(self, raw_data, start):
809
        self.data += raw_data
810
        self.file.size += len(raw_data)
811
        self.put_data(self.request.backend.block_size)
812
        return None
813

    
814
    def file_complete(self, file_size):
815
        l = len(self.data)
816
        if l > 0:
817
            self.put_data(l)
818
        self.file.etag = self.checksum_compute.hexdigest()
819
        return self.file
820

    
821

    
822
class ObjectWrapper(object):
823
    """Return the object's data block-per-block in each iteration.
824

825
    Read from the object using the offset and length provided
826
    in each entry of the range list.
827
    """
828

    
829
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
830
        self.backend = backend
831
        self.ranges = ranges
832
        self.sizes = sizes
833
        self.hashmaps = hashmaps
834
        self.boundary = boundary
835
        self.size = sum(self.sizes)
836

    
837
        self.file_index = 0
838
        self.block_index = 0
839
        self.block_hash = -1
840
        self.block = ''
841

    
842
        self.range_index = -1
843
        self.offset, self.length = self.ranges[0]
844

    
845
    def __iter__(self):
846
        return self
847

    
848
    def part_iterator(self):
849
        if self.length > 0:
850
            # Get the file for the current offset.
851
            file_size = self.sizes[self.file_index]
852
            while self.offset >= file_size:
853
                self.offset -= file_size
854
                self.file_index += 1
855
                file_size = self.sizes[self.file_index]
856

    
857
            # Get the block for the current position.
858
            self.block_index = int(self.offset / self.backend.block_size)
859
            if self.block_hash != \
860
                    self.hashmaps[self.file_index][self.block_index]:
861
                self.block_hash = self.hashmaps[
862
                    self.file_index][self.block_index]
863
                try:
864
                    self.block = self.backend.get_block(self.block_hash)
865
                except ItemNotExists:
866
                    raise faults.ItemNotFound('Block does not exist')
867

    
868
            # Get the data from the block.
869
            bo = self.offset % self.backend.block_size
870
            bs = self.backend.block_size
871
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
872
                    self.sizes[self.file_index] % self.backend.block_size):
873
                bs = self.sizes[self.file_index] % self.backend.block_size
874
            bl = min(self.length, bs - bo)
875
            data = self.block[bo:bo + bl]
876
            self.offset += bl
877
            self.length -= bl
878
            return data
879
        else:
880
            raise StopIteration
881

    
882
    def next(self):
883
        if len(self.ranges) == 1:
884
            return self.part_iterator()
885
        if self.range_index == len(self.ranges):
886
            raise StopIteration
887
        try:
888
            if self.range_index == -1:
889
                raise StopIteration
890
            return self.part_iterator()
891
        except StopIteration:
892
            self.range_index += 1
893
            out = []
894
            if self.range_index < len(self.ranges):
895
                # Part header.
896
                self.offset, self.length = self.ranges[self.range_index]
897
                self.file_index = 0
898
                if self.range_index > 0:
899
                    out.append('')
900
                out.append('--' + self.boundary)
901
                out.append('Content-Range: bytes %d-%d/%d' % (
902
                    self.offset, self.offset + self.length - 1, self.size))
903
                out.append('Content-Transfer-Encoding: binary')
904
                out.append('')
905
                out.append('')
906
                return '\r\n'.join(out)
907
            else:
908
                # Footer.
909
                out.append('')
910
                out.append('--' + self.boundary + '--')
911
                out.append('')
912
                return '\r\n'.join(out)
913

    
914

    
915
def object_data_response(request, sizes, hashmaps, meta, public=False):
916
    """Get the HttpResponse object for replying with the object's data."""
917

    
918
    # Range handling.
919
    size = sum(sizes)
920
    ranges = get_range(request, size)
921
    if ranges is None:
922
        ranges = [(0, size)]
923
        ret = 200
924
    else:
925
        check = [True for offset, length in ranges if
926
                 length <= 0 or length > size or
927
                 offset < 0 or offset >= size or
928
                 offset + length > size]
929
        if len(check) > 0:
930
            raise faults.RangeNotSatisfiable(
931
                'Requested range exceeds object limits')
932
        ret = 206
933
        if_range = request.META.get('HTTP_IF_RANGE')
934
        if if_range:
935
            try:
936
                # Modification time has passed instead.
937
                last_modified = parse_http_date(if_range)
938
                if last_modified != meta['modified']:
939
                    ranges = [(0, size)]
940
                    ret = 200
941
            except ValueError:
942
                if if_range != meta['checksum']:
943
                    ranges = [(0, size)]
944
                    ret = 200
945

    
946
    if ret == 206 and len(ranges) > 1:
947
        boundary = uuid.uuid4().hex
948
    else:
949
        boundary = ''
950
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
951
    response = HttpResponse(wrapper, status=ret)
952
    put_object_headers(
953
        response, meta, restricted=public,
954
        token=getattr(request, 'token', None),
955
        disposition_type=request.GET.get('disposition-type'))
956
    if ret == 206:
957
        if len(ranges) == 1:
958
            offset, length = ranges[0]
959
            response[
960
                'Content-Length'] = length  # Update with the correct length.
961
            response['Content-Range'] = 'bytes %d-%d/%d' % (
962
                offset, offset + length - 1, size)
963
        else:
964
            del(response['Content-Length'])
965
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (
966
                boundary,)
967
    return response
968

    
969

    
970
def put_object_block(request, hashmap, data, offset):
971
    """Put one block of data at the given offset."""
972

    
973
    bi = int(offset / request.backend.block_size)
974
    bo = offset % request.backend.block_size
975
    bl = min(len(data), request.backend.block_size - bo)
976
    if bi < len(hashmap):
977
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
978
    else:
979
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
980
    return bl  # Return ammount of data written.
981

    
982

    
983
def hashmap_md5(backend, hashmap, size):
984
    """Produce the MD5 sum from the data in the hashmap."""
985

    
986
    # TODO: Search backend for the MD5 of another object
987
    #       with the same hashmap and size...
988
    md5 = hashlib.md5()
989
    bs = backend.block_size
990
    for bi, hash in enumerate(hashmap):
991
        data = backend.get_block(hash)  # Blocks come in padded.
992
        if bi == len(hashmap) - 1:
993
            data = data[:size % bs]
994
        md5.update(data)
995
    return md5.hexdigest().lower()
996

    
997

    
998
def simple_list_response(request, l):
999
    if request.serialization == 'text':
1000
        return '\n'.join(l) + '\n'
1001
    if request.serialization == 'xml':
1002
        return render_to_string('items.xml', {'items': l})
1003
    if request.serialization == 'json':
1004
        return json.dumps(l)
1005

    
1006

    
1007
from pithos.backends.util import PithosBackendPool
1008

    
1009
if RADOS_STORAGE:
1010
    BLOCK_PARAMS = {'mappool': RADOS_POOL_MAPS,
1011
                    'blockpool': RADOS_POOL_BLOCKS, }
1012
else:
1013
    BLOCK_PARAMS = {'mappool': None,
1014
                    'blockpool': None, }
1015

    
1016
BACKEND_KWARGS = dict(
1017
    db_module=BACKEND_DB_MODULE,
1018
    db_connection=BACKEND_DB_CONNECTION,
1019
    block_module=BACKEND_BLOCK_MODULE,
1020
    block_path=BACKEND_BLOCK_PATH,
1021
    block_umask=BACKEND_BLOCK_UMASK,
1022
    block_size=BACKEND_BLOCK_SIZE,
1023
    hash_algorithm=BACKEND_HASH_ALGORITHM,
1024
    queue_module=BACKEND_QUEUE_MODULE,
1025
    queue_hosts=BACKEND_QUEUE_HOSTS,
1026
    queue_exchange=BACKEND_QUEUE_EXCHANGE,
1027
    astakos_auth_url=ASTAKOS_AUTH_URL,
1028
    service_token=SERVICE_TOKEN,
1029
    astakosclient_poolsize=ASTAKOSCLIENT_POOLSIZE,
1030
    free_versioning=BACKEND_FREE_VERSIONING,
1031
    block_params=BLOCK_PARAMS,
1032
    public_url_security=PUBLIC_URL_SECURITY,
1033
    public_url_alphabet=PUBLIC_URL_ALPHABET,
1034
    account_quota_policy=BACKEND_ACCOUNT_QUOTA,
1035
    container_quota_policy=BACKEND_CONTAINER_QUOTA,
1036
    container_versioning_policy=BACKEND_VERSIONING)
1037

    
1038
_pithos_backend_pool = PithosBackendPool(size=BACKEND_POOL_SIZE,
1039
                                         **BACKEND_KWARGS)
1040

    
1041

    
1042
def get_backend():
1043
    if BACKEND_POOL_ENABLED:
1044
        backend = _pithos_backend_pool.pool_get()
1045
    else:
1046
        backend = connect_backend(**BACKEND_KWARGS)
1047
    backend.serials = []
1048
    backend.messages = []
1049
    return backend
1050

    
1051

    
1052
def update_request_headers(request):
1053
    # Handle URL-encoded keys and values.
1054
    meta = dict([(
1055
        k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
1056
    for k, v in meta.iteritems():
1057
        try:
1058
            k.decode('ascii')
1059
            v.decode('ascii')
1060
        except UnicodeDecodeError:
1061
            raise faults.BadRequest('Bad character in headers.')
1062
        if '%' in k or '%' in v:
1063
            del(request.META[k])
1064
            request.META[unquote(k)] = smart_unicode(unquote(
1065
                v), strings_only=True)
1066

    
1067

    
1068
def update_response_headers(request, response):
1069
    # URL-encode unicode in headers.
1070
    meta = response.items()
1071
    for k, v in meta:
1072
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
1073
                k.startswith('X-Object-') or k.startswith('Content-')):
1074
            del(response[k])
1075
            response[quote(k)] = quote(v, safe='/=,:@; ')
1076

    
1077

    
1078
def api_method(http_method=None, token_required=True, user_required=True,
1079
               logger=None, format_allowed=False, serializations=None,
1080
               strict_serlization=False, lock_container_path=False):
1081
    serializations = serializations or ['json', 'xml']
1082

    
1083
    def decorator(func):
1084
        @api.api_method(http_method=http_method, token_required=token_required,
1085
                        user_required=user_required,
1086
                        logger=logger, format_allowed=format_allowed,
1087
                        astakos_auth_url=ASTAKOS_AUTH_URL,
1088
                        serializations=serializations,
1089
                        strict_serlization=strict_serlization)
1090
        @wraps(func)
1091
        def wrapper(request, *args, **kwargs):
1092
            # The args variable may contain up to (account, container, object).
1093
            if len(args) > 1 and len(args[1]) > 256:
1094
                raise faults.BadRequest("Container name too large")
1095
            if len(args) > 2 and len(args[2]) > 1024:
1096
                raise faults.BadRequest('Object name too large.')
1097

    
1098
            success_status = False
1099
            try:
1100
                # Add a PithosBackend as attribute of the request object
1101
                request.backend = get_backend()
1102
                request.backend.pre_exec(lock_container_path)
1103

    
1104
                # Many API method expect thet X-Auth-Token in request,token
1105
                request.token = request.x_auth_token
1106
                update_request_headers(request)
1107
                response = func(request, *args, **kwargs)
1108
                update_response_headers(request, response)
1109

    
1110
                success_status = True
1111
                return response
1112
            finally:
1113
                # Always close PithosBackend connection
1114
                if getattr(request, "backend", None) is not None:
1115
                    request.backend.post_exec(success_status)
1116
                    request.backend.close()
1117
        return wrapper
1118
    return decorator
1119

    
1120

    
1121
def restrict_to_host(host=None):
1122
    """
1123
    View decorator which restricts wrapped view to be accessed only under the
1124
    host set. If an invalid host is identified and request HTTP method is one
1125
    of ``GET``, ``HOST``, the decorator will return a redirect response using a
1126
    clone of the request with host replaced to the one the restriction applies
1127
    to.
1128

1129
    e.g.
1130
    @restrict_to_host('files.example.com')
1131
    my_restricted_view(request, path):
1132
        return HttpResponse(file(path).read())
1133

1134
    A get to ``https://api.example.com/my_restricted_view/file_path/?param=1``
1135
    will return a redirect response with Location header set to
1136
    ``https://files.example.com/my_restricted_view/file_path/?param=1``.
1137

1138
    If host is set to ``None`` no restriction will be applied.
1139
    """
1140
    def decorator(func):
1141
        # skip decoration if no host is set
1142
        if not host:
1143
            return func
1144

    
1145
        @wraps(func)
1146
        def wrapper(request, *args, **kwargs):
1147
            request_host = request.get_host()
1148
            if host != request_host:
1149
                proto = 'https' if request.is_secure() else 'http'
1150
                if request.method in ['GET', 'HEAD']:
1151
                    full_path = request.get_full_path()
1152
                    redirect_uri = "%s://%s%s" % (proto, host, full_path)
1153
                    return HttpResponseRedirect(redirect_uri)
1154
                else:
1155
                    raise PermissionDenied
1156
            return func(request, *args, **kwargs)
1157
        return wrapper
1158
    return decorator
1159

    
1160

    
1161
def view_method():
1162
    """Decorator function for views."""
1163

    
1164
    def decorator(func):
1165
        @restrict_to_host(UNSAFE_DOMAIN)
1166
        @wraps(func)
1167
        def wrapper(request, *args, **kwargs):
1168
            if request.method not in ['GET', 'HEAD']:
1169
                return HttpResponseNotAllowed(['GET', 'HEAD'])
1170

    
1171
            try:
1172
                access_token = request.GET.get('access_token')
1173
                requested_resource = text.uenc(request.path.split(VIEW_PREFIX,
1174
                                                                  2)[-1])
1175
                astakos = AstakosClient(SERVICE_TOKEN, ASTAKOS_AUTH_URL,
1176
                                        retry=2, use_pool=True,
1177
                                        logger=logger)
1178
                if access_token is not None:
1179
                    # authenticate using the short-term access token
1180
                    try:
1181
                        request.user = astakos.validate_token(
1182
                            access_token, requested_resource)
1183
                    except AstakosClientException:
1184
                        return HttpResponseRedirect(request.path)
1185
                    request.user_uniq = request.user["access"]["user"]["id"]
1186

    
1187
                    _func = api_method(token_required=False,
1188
                                       user_required=False)(func)
1189
                    response = _func(request, *args, **kwargs)
1190
                    if response.status_code == 404:
1191
                        raise Http404
1192
                    elif response.status_code == 403:
1193
                        raise PermissionDenied
1194
                    return response
1195

    
1196
                client_id, client_secret = OAUTH2_CLIENT_CREDENTIALS
1197
                # TODO: check if client credentials are not set
1198
                authorization_code = request.GET.get('code')
1199
                if authorization_code is None:
1200
                    # request authorization code
1201
                    params = {'response_type': 'code',
1202
                              'client_id': client_id,
1203
                              'redirect_uri':
1204
                              request.build_absolute_uri(request.path),
1205
                              'state': '',  # TODO include state for security
1206
                              'scope': requested_resource}
1207
                    return HttpResponseRedirect('%s?%s' %
1208
                                                (join_urls(astakos.oauth2_url,
1209
                                                           'auth'),
1210
                                                 urlencode(params)))
1211
                else:
1212
                    # request short-term access token
1213
                    redirect_uri = request.build_absolute_uri(request.path)
1214
                    data = astakos.get_token('authorization_code',
1215
                                             *OAUTH2_CLIENT_CREDENTIALS,
1216
                                             redirect_uri=redirect_uri,
1217
                                             scope=requested_resource,
1218
                                             code=authorization_code)
1219
                    params = {'access_token': data.get('access_token', '')}
1220
                    return HttpResponseRedirect('%s?%s' % (redirect_uri,
1221
                                                           urlencode(params)))
1222
            except AstakosClientException, err:
1223
                logger.exception(err)
1224
                raise PermissionDenied
1225
        return wrapper
1226
    return decorator
1227

    
1228

    
1229
class Checksum:
1230
    def __init__(self):
1231
        self.md5 = hashlib.md5()
1232

    
1233
    def update(self, data):
1234
        self.md5.update(data)
1235

    
1236
    def hexdigest(self):
1237
        return self.md5.hexdigest().lower()
1238

    
1239

    
1240
class NoChecksum:
1241
    def update(self, data):
1242
        pass
1243

    
1244
    def hexdigest(self):
1245
        return ''