Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ bfa0341f

History | View | Annotate | Download (45.2 kB)

1
# Copyright 2011-2013 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
#
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
#
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
#
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from datetime import datetime
36
from urllib import quote, unquote, urlencode
37

    
38
from django.http import (HttpResponse, Http404, HttpResponseRedirect,
39
                         HttpResponseNotAllowed)
40
from django.template.loader import render_to_string
41
from django.utils import simplejson as json
42
from django.utils.http import http_date, parse_etags
43
from django.utils.encoding import smart_unicode, smart_str
44
from django.core.files.uploadhandler import FileUploadHandler
45
from django.core.files.uploadedfile import UploadedFile
46
from django.core.urlresolvers import reverse
47
from django.core.exceptions import PermissionDenied
48

    
49
from snf_django.lib.api.parsedate import parse_http_date_safe, parse_http_date
50
from snf_django.lib import api
51
from snf_django.lib.api import faults, utils
52

    
53
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
54
                                 BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
55
                                 BACKEND_BLOCK_UMASK,
56
                                 BACKEND_QUEUE_MODULE, BACKEND_QUEUE_HOSTS,
57
                                 BACKEND_QUEUE_EXCHANGE,
58
                                 ASTAKOSCLIENT_POOLSIZE,
59
                                 SERVICE_TOKEN,
60
                                 ASTAKOS_AUTH_URL,
61
                                 BACKEND_ACCOUNT_QUOTA,
62
                                 BACKEND_CONTAINER_QUOTA,
63
                                 BACKEND_VERSIONING, BACKEND_FREE_VERSIONING,
64
                                 BACKEND_POOL_ENABLED, BACKEND_POOL_SIZE,
65
                                 BACKEND_BLOCK_SIZE, BACKEND_HASH_ALGORITHM,
66
                                 RADOS_STORAGE, RADOS_POOL_BLOCKS,
67
                                 RADOS_POOL_MAPS, TRANSLATE_UUIDS,
68
                                 PUBLIC_URL_SECURITY, PUBLIC_URL_ALPHABET,
69
                                 BASE_HOST, UPDATE_MD5, VIEW_PREFIX,
70
                                 OAUTH2_CLIENT_CREDENTIALS, UNSAFE_DOMAIN)
71

    
72
from pithos.api.resources import resources
73
from pithos.backends import connect_backend
74
from pithos.backends.base import (NotAllowedError, QuotaError, ItemNotExists,
75
                                  VersionNotExists)
76

    
77
from synnefo.lib import join_urls
78
from synnefo.util import text
79

    
80
from astakosclient import AstakosClient
81
from astakosclient.errors import NoUserName, NoUUID, AstakosClientException
82

    
83
import logging
84
import re
85
import hashlib
86
import uuid
87
import decimal
88

    
89
logger = logging.getLogger(__name__)
90

    
91

    
92
def json_encode_decimal(obj):
93
    if isinstance(obj, decimal.Decimal):
94
        return str(obj)
95
    raise TypeError(repr(obj) + " is not JSON serializable")
96

    
97

    
98
def rename_meta_key(d, old, new):
99
    if old not in d:
100
        return
101
    d[new] = d[old]
102
    del(d[old])
103

    
104

    
105
def printable_header_dict(d):
106
    """Format a meta dictionary for printing out json/xml.
107

108
    Convert all keys to lower case and replace dashes with underscores.
109
    Format 'last_modified' timestamp.
110
    """
111

    
112
    timestamps = ('last_modified', 'x_container_until_timestamp',
113
                  'x_acount_until_timestamp')
114
    for timestamp in timestamps:
115
        if timestamp in d and d[timestamp]:
116
            d[timestamp] = utils.isoformat(
117
                datetime.fromtimestamp(d[timestamp]))
118
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
119

    
120

    
121
def format_header_key(k):
122
    """Convert underscores to dashes and capitalize intra-dash strings."""
123
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
124

    
125

    
126
def get_header_prefix(request, prefix):
127
    """Get all prefix-* request headers in a dict.
128
       Reformat keys with format_header_key()."""
129

    
130
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
131
    # TODO: Document or remove '~' replacing.
132
    return dict([(format_header_key(k[5:]), v.replace('~', ''))
133
                for k, v in request.META.iteritems()
134
                if k.startswith(prefix) and len(k) > len(prefix)])
135

    
136

    
137
def check_meta_headers(meta):
138
    if len(meta) > 90:
139
        raise faults.BadRequest('Too many headers.')
140
    for k, v in meta.iteritems():
141
        if len(k) > 128:
142
            raise faults.BadRequest('Header name too large.')
143
        if len(v) > 256:
144
            raise faults.BadRequest('Header value too large.')
145

    
146

    
147
def get_account_headers(request):
148
    meta = get_header_prefix(request, 'X-Account-Meta-')
149
    check_meta_headers(meta)
150
    groups = {}
151
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
152
        n = k[16:].lower()
153
        if '-' in n or '_' in n:
154
            raise faults.BadRequest('Bad characters in group name')
155
        groups[n] = v.replace(' ', '').split(',')
156
        while '' in groups[n]:
157
            groups[n].remove('')
158
    return meta, groups
159

    
160

    
161
def put_account_headers(response, meta, groups, policy):
162
    if 'count' in meta:
163
        response['X-Account-Container-Count'] = meta['count']
164
    if 'bytes' in meta:
165
        response['X-Account-Bytes-Used'] = meta['bytes']
166
    response['Last-Modified'] = http_date(int(meta['modified']))
167
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
168
        response[smart_str(
169
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
170
    if 'until_timestamp' in meta:
171
        response['X-Account-Until-Timestamp'] = http_date(
172
            int(meta['until_timestamp']))
173
    for k, v in groups.iteritems():
174
        k = smart_str(k, strings_only=True)
175
        k = format_header_key('X-Account-Group-' + k)
176
        v = smart_str(','.join(v), strings_only=True)
177
        response[k] = v
178
    for k, v in policy.iteritems():
179
        response[smart_str(format_header_key('X-Account-Policy-' + k),
180
                 strings_only=True)] = smart_str(v, strings_only=True)
181

    
182

    
183
def get_container_headers(request):
184
    meta = get_header_prefix(request, 'X-Container-Meta-')
185
    check_meta_headers(meta)
186
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in
187
                  get_header_prefix(request,
188
                                    'X-Container-Policy-').iteritems()])
189
    return meta, policy
190

    
191

    
192
def put_container_headers(request, response, meta, policy):
193
    if 'count' in meta:
194
        response['X-Container-Object-Count'] = meta['count']
195
    if 'bytes' in meta:
196
        response['X-Container-Bytes-Used'] = meta['bytes']
197
    response['Last-Modified'] = http_date(int(meta['modified']))
198
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
199
        response[smart_str(
200
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
201
    l = [smart_str(x, strings_only=True) for x in meta['object_meta']
202
         if x.startswith('X-Object-Meta-')]
203
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
204
    response['X-Container-Block-Size'] = request.backend.block_size
205
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
206
    if 'until_timestamp' in meta:
207
        response['X-Container-Until-Timestamp'] = http_date(
208
            int(meta['until_timestamp']))
209
    for k, v in policy.iteritems():
210
        response[smart_str(format_header_key('X-Container-Policy-' + k),
211
                           strings_only=True)] = smart_str(v,
212
                                                           strings_only=True)
213

    
214

    
215
def get_object_headers(request):
216
    content_type = request.META.get('CONTENT_TYPE', None)
217
    meta = get_header_prefix(request, 'X-Object-Meta-')
218
    check_meta_headers(meta)
219
    if request.META.get('HTTP_CONTENT_ENCODING'):
220
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
221
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
222
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
223
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
224
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
225
    return content_type, meta, get_sharing(request), get_public(request)
226

    
227

    
228
def put_object_headers(response, meta, restricted=False, token=None):
229
    response['ETag'] = meta['hash'] if not UPDATE_MD5 else meta['checksum']
230
    response['Content-Length'] = meta['bytes']
231
    response.override_serialization = True
232
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
233
    response['Last-Modified'] = http_date(int(meta['modified']))
234
    if not restricted:
235
        response['X-Object-Hash'] = meta['hash']
236
        response['X-Object-UUID'] = meta['uuid']
237
        if TRANSLATE_UUIDS:
238
            meta['modified_by'] = \
239
                retrieve_displayname(token, meta['modified_by'])
240
        response['X-Object-Modified-By'] = smart_str(
241
            meta['modified_by'], strings_only=True)
242
        response['X-Object-Version'] = meta['version']
243
        response['X-Object-Version-Timestamp'] = http_date(
244
            int(meta['version_timestamp']))
245
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
246
            response[smart_str(
247
                k, strings_only=True)] = smart_str(meta[k], strings_only=True)
248
        for k in (
249
            'Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
250
            'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
251
                'X-Object-Public'):
252
            if k in meta:
253
                response[k] = smart_str(meta[k], strings_only=True)
254
    else:
255
        for k in ('Content-Encoding', 'Content-Disposition'):
256
            if k in meta:
257
                response[k] = smart_str(meta[k], strings_only=True)
258

    
259

    
260
def update_manifest_meta(request, v_account, meta):
261
    """Update metadata if the object has an X-Object-Manifest."""
262

    
263
    if 'X-Object-Manifest' in meta:
264
        etag = ''
265
        bytes = 0
266
        try:
267
            src_container, src_name = split_container_object_string(
268
                '/' + meta['X-Object-Manifest'])
269
            objects = request.backend.list_objects(
270
                request.user_uniq, v_account,
271
                src_container, prefix=src_name, virtual=False)
272
            for x in objects:
273
                src_meta = request.backend.get_object_meta(
274
                    request.user_uniq, v_account, src_container, x[0],
275
                    'pithos', x[1])
276
                etag += (src_meta['hash'] if not UPDATE_MD5 else
277
                         src_meta['checksum'])
278
                bytes += src_meta['bytes']
279
        except:
280
            # Ignore errors.
281
            return
282
        meta['bytes'] = bytes
283
        md5 = hashlib.md5()
284
        md5.update(etag)
285
        meta['checksum'] = md5.hexdigest().lower()
286

    
287

    
288
def is_uuid(str):
289
    if str is None:
290
        return False
291
    try:
292
        uuid.UUID(str)
293
    except ValueError:
294
        return False
295
    else:
296
        return True
297

    
298

    
299
##########################
300
# USER CATALOG utilities #
301
##########################
302

    
303
def retrieve_displayname(token, uuid, fail_silently=True):
304
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
305
                            retry=2, use_pool=True,
306
                            logger=logger)
307
    try:
308
        displayname = astakos.get_username(uuid)
309
    except NoUserName:
310
        if not fail_silently:
311
            raise ItemNotExists(uuid)
312
        else:
313
            # just return the uuid
314
            return uuid
315
    return displayname
316

    
317

    
318
def retrieve_displaynames(token, uuids, return_dict=False, fail_silently=True):
319
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
320
                            retry=2, use_pool=True,
321
                            logger=logger)
322
    catalog = astakos.get_usernames(uuids) or {}
323
    missing = list(set(uuids) - set(catalog))
324
    if missing and not fail_silently:
325
        raise ItemNotExists('Unknown displaynames: %s' % ', '.join(missing))
326
    return catalog if return_dict else [catalog.get(i) for i in uuids]
327

    
328

    
329
def retrieve_uuid(token, displayname):
330
    if is_uuid(displayname):
331
        return displayname
332

    
333
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
334
                            retry=2, use_pool=True,
335
                            logger=logger)
336
    try:
337
        uuid = astakos.get_uuid(displayname)
338
    except NoUUID:
339
        raise ItemNotExists(displayname)
340
    return uuid
341

    
342

    
343
def retrieve_uuids(token, displaynames, return_dict=False, fail_silently=True):
344
    astakos = AstakosClient(token, ASTAKOS_AUTH_URL,
345
                            retry=2, use_pool=True,
346
                            logger=logger)
347
    catalog = astakos.get_uuids(displaynames) or {}
348
    missing = list(set(displaynames) - set(catalog))
349
    if missing and not fail_silently:
350
        raise ItemNotExists('Unknown uuids: %s' % ', '.join(missing))
351
    return catalog if return_dict else [catalog.get(i) for i in displaynames]
352

    
353

    
354
def replace_permissions_displayname(token, holder):
355
    if holder == '*':
356
        return holder
357
    try:
358
        # check first for a group permission
359
        account, group = holder.split(':', 1)
360
    except ValueError:
361
        return retrieve_uuid(token, holder)
362
    else:
363
        return ':'.join([retrieve_uuid(token, account), group])
364

    
365

    
366
def replace_permissions_uuid(token, holder):
367
    if holder == '*':
368
        return holder
369
    try:
370
        # check first for a group permission
371
        account, group = holder.split(':', 1)
372
    except ValueError:
373
        return retrieve_displayname(token, holder)
374
    else:
375
        return ':'.join([retrieve_displayname(token, account), group])
376

    
377

    
378
def update_sharing_meta(request, permissions, v_account,
379
                        v_container, v_object, meta):
380
    if permissions is None:
381
        return
382
    allowed, perm_path, perms = permissions
383
    if len(perms) == 0:
384
        return
385

    
386
    # replace uuid with displayname
387
    if TRANSLATE_UUIDS:
388
        perms['read'] = [replace_permissions_uuid(
389
            getattr(request, 'token', None), x)
390
            for x in perms.get('read', [])]
391
        perms['write'] = [replace_permissions_uuid(
392
            getattr(request, 'token', None), x)
393
            for x in perms.get('write', [])]
394

    
395
    ret = []
396

    
397
    r = ','.join(perms.get('read', []))
398
    if r:
399
        ret.append('read=' + r)
400
    w = ','.join(perms.get('write', []))
401
    if w:
402
        ret.append('write=' + w)
403
    meta['X-Object-Sharing'] = '; '.join(ret)
404
    if '/'.join((v_account, v_container, v_object)) != perm_path:
405
        meta['X-Object-Shared-By'] = perm_path
406
    if request.user_uniq != v_account:
407
        meta['X-Object-Allowed-To'] = allowed
408

    
409

    
410
def update_public_meta(public, meta):
411
    if not public:
412
        return
413
    meta['X-Object-Public'] = join_urls(
414
        BASE_HOST, reverse('pithos.api.public.public_demux', args=(public,)))
415

    
416

    
417
def validate_modification_preconditions(request, meta):
418
    """Check the modified timestamp conforms with the preconditions set."""
419

    
420
    if 'modified' not in meta:
421
        return  # TODO: Always return?
422

    
423
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
424
    if if_modified_since is not None:
425
        if_modified_since = parse_http_date_safe(if_modified_since)
426
    if (if_modified_since is not None
427
            and int(meta['modified']) <= if_modified_since):
428
        raise faults.NotModified('Resource has not been modified')
429

    
430
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
431
    if if_unmodified_since is not None:
432
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
433
    if (if_unmodified_since is not None
434
            and int(meta['modified']) > if_unmodified_since):
435
        raise faults.PreconditionFailed('Resource has been modified')
436

    
437

    
438
def validate_matching_preconditions(request, meta):
439
    """Check that the ETag conforms with the preconditions set."""
440

    
441
    etag = meta.get('hash') if not UPDATE_MD5 else meta.get('checksum')
442

    
443
    if_match = request.META.get('HTTP_IF_MATCH')
444
    if if_match is not None:
445
        if etag is None:
446
            raise faults.PreconditionFailed('Resource does not exist')
447
        if (if_match != '*'
448
                and etag not in [x.lower() for x in parse_etags(if_match)]):
449
            raise faults.PreconditionFailed('Resource ETag does not match')
450

    
451
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
452
    if if_none_match is not None:
453
        # TODO: If this passes, must ignore If-Modified-Since header.
454
        if etag is not None:
455
            if (if_none_match == '*' or etag in [x.lower() for x in
456
                                                 parse_etags(if_none_match)]):
457
                # TODO: Continue if an If-Modified-Since header is present.
458
                if request.method in ('HEAD', 'GET'):
459
                    raise faults.NotModified('Resource ETag matches')
460
                raise faults.PreconditionFailed(
461
                    'Resource exists or ETag matches')
462

    
463

    
464
def split_container_object_string(s):
465
    if not len(s) > 0 or s[0] != '/':
466
        raise ValueError
467
    s = s[1:]
468
    pos = s.find('/')
469
    if pos == -1 or pos == len(s) - 1:
470
        raise ValueError
471
    return s[:pos], s[(pos + 1):]
472

    
473

    
474
def copy_or_move_object(request, src_account, src_container, src_name,
475
                        dest_account, dest_container, dest_name,
476
                        move=False, delimiter=None):
477
    """Copy or move an object."""
478

    
479
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
480
        del(request.META['CONTENT_TYPE'])
481
    content_type, meta, permissions, public = get_object_headers(request)
482
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
483
    try:
484
        if move:
485
            version_id = request.backend.move_object(
486
                request.user_uniq, src_account, src_container, src_name,
487
                dest_account, dest_container, dest_name,
488
                content_type, 'pithos', meta, False, permissions, delimiter)
489
        else:
490
            version_id = request.backend.copy_object(
491
                request.user_uniq, src_account, src_container, src_name,
492
                dest_account, dest_container, dest_name,
493
                content_type, 'pithos', meta, False, permissions,
494
                src_version, delimiter)
495
    except NotAllowedError:
496
        raise faults.Forbidden('Not allowed')
497
    except (ItemNotExists, VersionNotExists):
498
        raise faults.ItemNotFound('Container or object does not exist')
499
    except ValueError:
500
        raise faults.BadRequest('Invalid sharing header')
501
    except QuotaError, e:
502
        raise faults.RequestEntityTooLarge('Quota error: %s' % e)
503
    if public is not None:
504
        try:
505
            request.backend.update_object_public(
506
                request.user_uniq, dest_account,
507
                dest_container, dest_name, public)
508
        except NotAllowedError:
509
            raise faults.Forbidden('Not allowed')
510
        except ItemNotExists:
511
            raise faults.ItemNotFound('Object does not exist')
512
    return version_id
513

    
514

    
515
def get_int_parameter(p):
516
    if p is not None:
517
        try:
518
            p = int(p)
519
        except ValueError:
520
            return None
521
        if p < 0:
522
            return None
523
    return p
524

    
525

    
526
def get_content_length(request):
527
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
528
    if content_length is None:
529
        raise faults.LengthRequired('Missing or invalid Content-Length header')
530
    return content_length
531

    
532

    
533
def get_range(request, size):
534
    """Parse a Range header from the request.
535

536
    Either returns None, when the header is not existent or should be ignored,
537
    or a list of (offset, length) tuples - should be further checked.
538
    """
539

    
540
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
541
    if not ranges.startswith('bytes='):
542
        return None
543

    
544
    ret = []
545
    for r in (x.strip() for x in ranges[6:].split(',')):
546
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
547
        m = p.match(r)
548
        if not m:
549
            return None
550
        offset = m.group('offset')
551
        upto = m.group('upto')
552
        if offset == '' and upto == '':
553
            return None
554

    
555
        if offset != '':
556
            offset = int(offset)
557
            if upto != '':
558
                upto = int(upto)
559
                if offset > upto:
560
                    return None
561
                ret.append((offset, upto - offset + 1))
562
            else:
563
                ret.append((offset, size - offset))
564
        else:
565
            length = int(upto)
566
            ret.append((size - length, length))
567

    
568
    return ret
569

    
570

    
571
def get_content_range(request):
572
    """Parse a Content-Range header from the request.
573

574
    Either returns None, when the header is not existent or should be ignored,
575
    or an (offset, length, total) tuple - check as length, total may be None.
576
    Returns (None, None, None) if the provided range is '*/*'.
577
    """
578

    
579
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
580
    if not ranges:
581
        return None
582

    
583
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
584
    m = p.match(ranges)
585
    if not m:
586
        if ranges == 'bytes */*':
587
            return (None, None, None)
588
        return None
589
    offset = int(m.group('offset'))
590
    upto = m.group('upto')
591
    total = m.group('total')
592
    if upto != '':
593
        upto = int(upto)
594
    else:
595
        upto = None
596
    if total != '*':
597
        total = int(total)
598
    else:
599
        total = None
600
    if (upto is not None and offset > upto) or \
601
        (total is not None and offset >= total) or \
602
            (total is not None and upto is not None and upto >= total):
603
        return None
604

    
605
    if upto is None:
606
        length = None
607
    else:
608
        length = upto - offset + 1
609
    return (offset, length, total)
610

    
611

    
612
def get_sharing(request):
613
    """Parse an X-Object-Sharing header from the request.
614

615
    Raises BadRequest on error.
616
    """
617

    
618
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
619
    if permissions is None:
620
        return None
621

    
622
    # TODO: Document or remove '~' replacing.
623
    permissions = permissions.replace('~', '')
624

    
625
    ret = {}
626
    permissions = permissions.replace(' ', '')
627
    if permissions == '':
628
        return ret
629
    for perm in (x for x in permissions.split(';')):
630
        if perm.startswith('read='):
631
            ret['read'] = list(set(
632
                [v.replace(' ', '').lower() for v in perm[5:].split(',')]))
633
            if '' in ret['read']:
634
                ret['read'].remove('')
635
            if '*' in ret['read']:
636
                ret['read'] = ['*']
637
            if len(ret['read']) == 0:
638
                raise faults.BadRequest(
639
                    'Bad X-Object-Sharing header value: invalid length')
640
        elif perm.startswith('write='):
641
            ret['write'] = list(set(
642
                [v.replace(' ', '').lower() for v in perm[6:].split(',')]))
643
            if '' in ret['write']:
644
                ret['write'].remove('')
645
            if '*' in ret['write']:
646
                ret['write'] = ['*']
647
            if len(ret['write']) == 0:
648
                raise faults.BadRequest(
649
                    'Bad X-Object-Sharing header value: invalid length')
650
        else:
651
            raise faults.BadRequest(
652
                'Bad X-Object-Sharing header value: missing prefix')
653

    
654
    # replace displayname with uuid
655
    if TRANSLATE_UUIDS:
656
        try:
657
            ret['read'] = [replace_permissions_displayname(
658
                getattr(request, 'token', None), x)
659
                for x in ret.get('read', [])]
660
            ret['write'] = [replace_permissions_displayname(
661
                getattr(request, 'token', None), x)
662
                for x in ret.get('write', [])]
663
        except ItemNotExists, e:
664
            raise faults.BadRequest(
665
                'Bad X-Object-Sharing header value: unknown account: %s' % e)
666

    
667
    # Keep duplicates only in write list.
668
    dups = [x for x in ret.get(
669
        'read', []) if x in ret.get('write', []) and x != '*']
670
    if dups:
671
        for x in dups:
672
            ret['read'].remove(x)
673
        if len(ret['read']) == 0:
674
            del(ret['read'])
675

    
676
    return ret
677

    
678

    
679
def get_public(request):
680
    """Parse an X-Object-Public header from the request.
681

682
    Raises BadRequest on error.
683
    """
684

    
685
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
686
    if public is None:
687
        return None
688

    
689
    public = public.replace(' ', '').lower()
690
    if public == 'true':
691
        return True
692
    elif public == 'false' or public == '':
693
        return False
694
    raise faults.BadRequest('Bad X-Object-Public header value')
695

    
696

    
697
def raw_input_socket(request):
698
    """Return the socket for reading the rest of the request."""
699

    
700
    server_software = request.META.get('SERVER_SOFTWARE')
701
    if server_software and server_software.startswith('mod_python'):
702
        return request._req
703
    if 'wsgi.input' in request.environ:
704
        return request.environ['wsgi.input']
705
    raise NotImplemented('Unknown server software')
706

    
707
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024)  # 5GB
708

    
709

    
710
def socket_read_iterator(request, length=0, blocksize=4096):
711
    """Return maximum of blocksize data read from the socket in each iteration
712

713
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
714
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
715
    """
716

    
717
    sock = raw_input_socket(request)
718
    if length < 0:  # Chunked transfers
719
        # Small version (server does the dechunking).
720
        if (request.environ.get('mod_wsgi.input_chunked', None)
721
                or request.META['SERVER_SOFTWARE'].startswith('gunicorn')):
722
            while length < MAX_UPLOAD_SIZE:
723
                data = sock.read(blocksize)
724
                if data == '':
725
                    return
726
                yield data
727
            raise faults.BadRequest('Maximum size is reached')
728

    
729
        # Long version (do the dechunking).
730
        data = ''
731
        while length < MAX_UPLOAD_SIZE:
732
            # Get chunk size.
733
            if hasattr(sock, 'readline'):
734
                chunk_length = sock.readline()
735
            else:
736
                chunk_length = ''
737
                while chunk_length[-1:] != '\n':
738
                    chunk_length += sock.read(1)
739
                chunk_length.strip()
740
            pos = chunk_length.find(';')
741
            if pos >= 0:
742
                chunk_length = chunk_length[:pos]
743
            try:
744
                chunk_length = int(chunk_length, 16)
745
            except Exception:
746
                raise faults.BadRequest('Bad chunk size')
747
                                 # TODO: Change to something more appropriate.
748
            # Check if done.
749
            if chunk_length == 0:
750
                if len(data) > 0:
751
                    yield data
752
                return
753
            # Get the actual data.
754
            while chunk_length > 0:
755
                chunk = sock.read(min(chunk_length, blocksize))
756
                chunk_length -= len(chunk)
757
                if length > 0:
758
                    length += len(chunk)
759
                data += chunk
760
                if len(data) >= blocksize:
761
                    ret = data[:blocksize]
762
                    data = data[blocksize:]
763
                    yield ret
764
            sock.read(2)  # CRLF
765
        raise faults.BadRequest('Maximum size is reached')
766
    else:
767
        if length > MAX_UPLOAD_SIZE:
768
            raise faults.BadRequest('Maximum size is reached')
769
        while length > 0:
770
            data = sock.read(min(length, blocksize))
771
            if not data:
772
                raise faults.BadRequest()
773
            length -= len(data)
774
            yield data
775

    
776

    
777
class SaveToBackendHandler(FileUploadHandler):
778
    """Handle a file from an HTML form the django way."""
779

    
780
    def __init__(self, request=None):
781
        super(SaveToBackendHandler, self).__init__(request)
782
        self.backend = request.backend
783

    
784
    def put_data(self, length):
785
        if len(self.data) >= length:
786
            block = self.data[:length]
787
            self.file.hashmap.append(self.backend.put_block(block))
788
            self.checksum_compute.update(block)
789
            self.data = self.data[length:]
790

    
791
    def new_file(self, field_name, file_name, content_type,
792
                 content_length, charset=None):
793
        self.checksum_compute = NoChecksum() if not UPDATE_MD5 else Checksum()
794
        self.data = ''
795
        self.file = UploadedFile(
796
            name=file_name, content_type=content_type, charset=charset)
797
        self.file.size = 0
798
        self.file.hashmap = []
799

    
800
    def receive_data_chunk(self, raw_data, start):
801
        self.data += raw_data
802
        self.file.size += len(raw_data)
803
        self.put_data(self.request.backend.block_size)
804
        return None
805

    
806
    def file_complete(self, file_size):
807
        l = len(self.data)
808
        if l > 0:
809
            self.put_data(l)
810
        self.file.etag = self.checksum_compute.hexdigest()
811
        return self.file
812

    
813

    
814
class ObjectWrapper(object):
815
    """Return the object's data block-per-block in each iteration.
816

817
    Read from the object using the offset and length provided
818
    in each entry of the range list.
819
    """
820

    
821
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
822
        self.backend = backend
823
        self.ranges = ranges
824
        self.sizes = sizes
825
        self.hashmaps = hashmaps
826
        self.boundary = boundary
827
        self.size = sum(self.sizes)
828

    
829
        self.file_index = 0
830
        self.block_index = 0
831
        self.block_hash = -1
832
        self.block = ''
833

    
834
        self.range_index = -1
835
        self.offset, self.length = self.ranges[0]
836

    
837
    def __iter__(self):
838
        return self
839

    
840
    def part_iterator(self):
841
        if self.length > 0:
842
            # Get the file for the current offset.
843
            file_size = self.sizes[self.file_index]
844
            while self.offset >= file_size:
845
                self.offset -= file_size
846
                self.file_index += 1
847
                file_size = self.sizes[self.file_index]
848

    
849
            # Get the block for the current position.
850
            self.block_index = int(self.offset / self.backend.block_size)
851
            if self.block_hash != \
852
                    self.hashmaps[self.file_index][self.block_index]:
853
                self.block_hash = self.hashmaps[
854
                    self.file_index][self.block_index]
855
                try:
856
                    self.block = self.backend.get_block(self.block_hash)
857
                except ItemNotExists:
858
                    raise faults.ItemNotFound('Block does not exist')
859

    
860
            # Get the data from the block.
861
            bo = self.offset % self.backend.block_size
862
            bs = self.backend.block_size
863
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
864
                    self.sizes[self.file_index] % self.backend.block_size):
865
                bs = self.sizes[self.file_index] % self.backend.block_size
866
            bl = min(self.length, bs - bo)
867
            data = self.block[bo:bo + bl]
868
            self.offset += bl
869
            self.length -= bl
870
            return data
871
        else:
872
            raise StopIteration
873

    
874
    def next(self):
875
        if len(self.ranges) == 1:
876
            return self.part_iterator()
877
        if self.range_index == len(self.ranges):
878
            raise StopIteration
879
        try:
880
            if self.range_index == -1:
881
                raise StopIteration
882
            return self.part_iterator()
883
        except StopIteration:
884
            self.range_index += 1
885
            out = []
886
            if self.range_index < len(self.ranges):
887
                # Part header.
888
                self.offset, self.length = self.ranges[self.range_index]
889
                self.file_index = 0
890
                if self.range_index > 0:
891
                    out.append('')
892
                out.append('--' + self.boundary)
893
                out.append('Content-Range: bytes %d-%d/%d' % (
894
                    self.offset, self.offset + self.length - 1, self.size))
895
                out.append('Content-Transfer-Encoding: binary')
896
                out.append('')
897
                out.append('')
898
                return '\r\n'.join(out)
899
            else:
900
                # Footer.
901
                out.append('')
902
                out.append('--' + self.boundary + '--')
903
                out.append('')
904
                return '\r\n'.join(out)
905

    
906

    
907
def object_data_response(request, sizes, hashmaps, meta, public=False):
908
    """Get the HttpResponse object for replying with the object's data."""
909

    
910
    # Range handling.
911
    size = sum(sizes)
912
    ranges = get_range(request, size)
913
    if ranges is None:
914
        ranges = [(0, size)]
915
        ret = 200
916
    else:
917
        check = [True for offset, length in ranges if
918
                 length <= 0 or length > size or
919
                 offset < 0 or offset >= size or
920
                 offset + length > size]
921
        if len(check) > 0:
922
            raise faults.RangeNotSatisfiable(
923
                'Requested range exceeds object limits')
924
        ret = 206
925
        if_range = request.META.get('HTTP_IF_RANGE')
926
        if if_range:
927
            try:
928
                # Modification time has passed instead.
929
                last_modified = parse_http_date(if_range)
930
                if last_modified != meta['modified']:
931
                    ranges = [(0, size)]
932
                    ret = 200
933
            except ValueError:
934
                if if_range != meta['checksum']:
935
                    ranges = [(0, size)]
936
                    ret = 200
937

    
938
    if ret == 206 and len(ranges) > 1:
939
        boundary = uuid.uuid4().hex
940
    else:
941
        boundary = ''
942
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
943
    response = HttpResponse(wrapper, status=ret)
944
    put_object_headers(
945
        response, meta, restricted=public,
946
        token=getattr(request, 'token', None))
947
    if ret == 206:
948
        if len(ranges) == 1:
949
            offset, length = ranges[0]
950
            response[
951
                'Content-Length'] = length  # Update with the correct length.
952
            response['Content-Range'] = 'bytes %d-%d/%d' % (
953
                offset, offset + length - 1, size)
954
        else:
955
            del(response['Content-Length'])
956
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (
957
                boundary,)
958
    return response
959

    
960

    
961
def put_object_block(request, hashmap, data, offset):
962
    """Put one block of data at the given offset."""
963

    
964
    bi = int(offset / request.backend.block_size)
965
    bo = offset % request.backend.block_size
966
    bl = min(len(data), request.backend.block_size - bo)
967
    if bi < len(hashmap):
968
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
969
    else:
970
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
971
    return bl  # Return ammount of data written.
972

    
973

    
974
def hashmap_md5(backend, hashmap, size):
975
    """Produce the MD5 sum from the data in the hashmap."""
976

    
977
    # TODO: Search backend for the MD5 of another object
978
    #       with the same hashmap and size...
979
    md5 = hashlib.md5()
980
    bs = backend.block_size
981
    for bi, hash in enumerate(hashmap):
982
        data = backend.get_block(hash)  # Blocks come in padded.
983
        if bi == len(hashmap) - 1:
984
            data = data[:size % bs]
985
        md5.update(data)
986
    return md5.hexdigest().lower()
987

    
988

    
989
def simple_list_response(request, l):
990
    if request.serialization == 'text':
991
        return '\n'.join(l) + '\n'
992
    if request.serialization == 'xml':
993
        return render_to_string('items.xml', {'items': l})
994
    if request.serialization == 'json':
995
        return json.dumps(l)
996

    
997

    
998
from pithos.backends.util import PithosBackendPool
999

    
1000
if RADOS_STORAGE:
1001
    BLOCK_PARAMS = {'mappool': RADOS_POOL_MAPS,
1002
                    'blockpool': RADOS_POOL_BLOCKS, }
1003
else:
1004
    BLOCK_PARAMS = {'mappool': None,
1005
                    'blockpool': None, }
1006

    
1007
BACKEND_KWARGS = dict(
1008
    db_module=BACKEND_DB_MODULE,
1009
    db_connection=BACKEND_DB_CONNECTION,
1010
    block_module=BACKEND_BLOCK_MODULE,
1011
    block_path=BACKEND_BLOCK_PATH,
1012
    block_umask=BACKEND_BLOCK_UMASK,
1013
    block_size=BACKEND_BLOCK_SIZE,
1014
    hash_algorithm=BACKEND_HASH_ALGORITHM,
1015
    queue_module=BACKEND_QUEUE_MODULE,
1016
    queue_hosts=BACKEND_QUEUE_HOSTS,
1017
    queue_exchange=BACKEND_QUEUE_EXCHANGE,
1018
    astakos_auth_url=ASTAKOS_AUTH_URL,
1019
    service_token=SERVICE_TOKEN,
1020
    astakosclient_poolsize=ASTAKOSCLIENT_POOLSIZE,
1021
    free_versioning=BACKEND_FREE_VERSIONING,
1022
    block_params=BLOCK_PARAMS,
1023
    public_url_security=PUBLIC_URL_SECURITY,
1024
    public_url_alphabet=PUBLIC_URL_ALPHABET,
1025
    account_quota_policy=BACKEND_ACCOUNT_QUOTA,
1026
    container_quota_policy=BACKEND_CONTAINER_QUOTA,
1027
    container_versioning_policy=BACKEND_VERSIONING)
1028

    
1029
_pithos_backend_pool = PithosBackendPool(size=BACKEND_POOL_SIZE,
1030
                                         **BACKEND_KWARGS)
1031

    
1032

    
1033
def get_backend():
1034
    if BACKEND_POOL_ENABLED:
1035
        backend = _pithos_backend_pool.pool_get()
1036
    else:
1037
        backend = connect_backend(**BACKEND_KWARGS)
1038
    backend.serials = []
1039
    backend.messages = []
1040
    return backend
1041

    
1042

    
1043
def update_request_headers(request):
1044
    # Handle URL-encoded keys and values.
1045
    meta = dict([(
1046
        k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
1047
    for k, v in meta.iteritems():
1048
        try:
1049
            k.decode('ascii')
1050
            v.decode('ascii')
1051
        except UnicodeDecodeError:
1052
            raise faults.BadRequest('Bad character in headers.')
1053
        if '%' in k or '%' in v:
1054
            del(request.META[k])
1055
            request.META[unquote(k)] = smart_unicode(unquote(
1056
                v), strings_only=True)
1057

    
1058

    
1059
def update_response_headers(request, response):
1060
    # URL-encode unicode in headers.
1061
    meta = response.items()
1062
    for k, v in meta:
1063
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
1064
                k.startswith('X-Object-') or k.startswith('Content-')):
1065
            del(response[k])
1066
            response[quote(k)] = quote(v, safe='/=,:@; ')
1067

    
1068

    
1069
def api_method(http_method=None, token_required=True, user_required=True,
1070
               logger=None, format_allowed=False, serializations=None,
1071
               strict_serlization=False, lock_container_path=False):
1072
    serializations = serializations or ['json', 'xml']
1073

    
1074
    def decorator(func):
1075
        @api.api_method(http_method=http_method, token_required=token_required,
1076
                        user_required=user_required,
1077
                        logger=logger, format_allowed=format_allowed,
1078
                        astakos_auth_url=ASTAKOS_AUTH_URL,
1079
                        serializations=serializations,
1080
                        strict_serlization=strict_serlization)
1081
        @wraps(func)
1082
        def wrapper(request, *args, **kwargs):
1083
            # The args variable may contain up to (account, container, object).
1084
            if len(args) > 1 and len(args[1]) > 256:
1085
                raise faults.BadRequest("Container name too large")
1086
            if len(args) > 2 and len(args[2]) > 1024:
1087
                raise faults.BadRequest('Object name too large.')
1088

    
1089
            success_status = False
1090
            try:
1091
                # Add a PithosBackend as attribute of the request object
1092
                request.backend = get_backend()
1093
                request.backend.pre_exec(lock_container_path)
1094

    
1095
                # Many API method expect thet X-Auth-Token in request,token
1096
                request.token = request.x_auth_token
1097
                update_request_headers(request)
1098
                response = func(request, *args, **kwargs)
1099
                update_response_headers(request, response)
1100

    
1101
                success_status = True
1102
                return response
1103
            finally:
1104
                # Always close PithosBackend connection
1105
                if getattr(request, "backend", None) is not None:
1106
                    request.backend.post_exec(success_status)
1107
                    request.backend.close()
1108
        return wrapper
1109
    return decorator
1110

    
1111

    
1112
def restrict_to_host(host=None):
1113
    """
1114
    View decorator which restricts wrapped view to be accessed only under the
1115
    host set. If an invalid host is identified and request HTTP method is one
1116
    of ``GET``, ``HOST``, the decorator will return a redirect response using a
1117
    clone of the request with host replaced to the one the restriction applies
1118
    to.
1119

1120
    e.g.
1121
    @restrict_to_host('files.example.com')
1122
    my_restricted_view(request, path):
1123
        return HttpResponse(file(path).read())
1124

1125
    A get to ``https://api.example.com/my_restricted_view/file_path/?param=1``
1126
    will return a redirect response with Location header set to
1127
    ``https://files.example.com/my_restricted_view/file_path/?param=1``.
1128

1129
    If host is set to ``None`` no restriction will be applied.
1130
    """
1131
    def decorator(func):
1132
        # skip decoration if no host is set
1133
        if not host:
1134
            return func
1135

    
1136
        @wraps(func)
1137
        def wrapper(request, *args, **kwargs):
1138
            request_host = request.get_host()
1139
            if host != request_host:
1140
                proto = 'https' if request.is_secure() else 'http'
1141
                if request.method in ['GET', 'HEAD']:
1142
                    full_path = request.get_full_path()
1143
                    redirect_uri = "%s://%s%s" % (proto, host, full_path)
1144
                    return HttpResponseRedirect(redirect_uri)
1145
                else:
1146
                    raise PermissionDenied
1147
            return func(request, *args, **kwargs)
1148
        return wrapper
1149
    return decorator
1150

    
1151

    
1152
def view_method():
1153
    """Decorator function for views."""
1154

    
1155
    def decorator(func):
1156
        @restrict_to_host(UNSAFE_DOMAIN)
1157
        @wraps(func)
1158
        def wrapper(request, *args, **kwargs):
1159
            if request.method not in ['GET', 'HEAD']:
1160
                return HttpResponseNotAllowed(['GET', 'HEAD'])
1161

    
1162
            try:
1163
                access_token = request.GET.get('access_token')
1164
                requested_resource = text.uenc(request.path.split(VIEW_PREFIX,
1165
                                                                  2)[-1])
1166
                astakos = AstakosClient(SERVICE_TOKEN, ASTAKOS_AUTH_URL,
1167
                                        retry=2, use_pool=True,
1168
                                        logger=logger)
1169
                if access_token is not None:
1170
                    # authenticate using the short-term access token
1171
                    try:
1172
                        request.user = astakos.validate_token(
1173
                            access_token, requested_resource)
1174
                    except AstakosClientException:
1175
                        return HttpResponseRedirect(request.path)
1176
                    request.user_uniq = request.user["access"]["user"]["id"]
1177

    
1178
                    _func = api_method(token_required=False,
1179
                                       user_required=False)(func)
1180
                    response = _func(request, *args, **kwargs)
1181
                    if response.status_code == 404:
1182
                        raise Http404
1183
                    elif response.status_code == 403:
1184
                        raise PermissionDenied
1185
                    return response
1186

    
1187
                client_id, client_secret = OAUTH2_CLIENT_CREDENTIALS
1188
                # TODO: check if client credentials are not set
1189
                authorization_code = request.GET.get('code')
1190
                if authorization_code is None:
1191
                    # request authorization code
1192
                    params = {'response_type': 'code',
1193
                              'client_id': client_id,
1194
                              'redirect_uri':
1195
                              request.build_absolute_uri(request.path),
1196
                              'state': '',  # TODO include state for security
1197
                              'scope': requested_resource}
1198
                    return HttpResponseRedirect('%s?%s' %
1199
                                                (join_urls(astakos.oauth2_url,
1200
                                                           'auth'),
1201
                                                 urlencode(params)))
1202
                else:
1203
                    # request short-term access token
1204
                    redirect_uri = request.build_absolute_uri(request.path)
1205
                    data = astakos.get_token('authorization_code',
1206
                                             *OAUTH2_CLIENT_CREDENTIALS,
1207
                                             redirect_uri=redirect_uri,
1208
                                             scope=requested_resource,
1209
                                             code=authorization_code)
1210
                    params = {'access_token': data.get('access_token', '')}
1211
                    return HttpResponseRedirect('%s?%s' % (redirect_uri,
1212
                                                           urlencode(params)))
1213
            except AstakosClientException, err:
1214
                logger.exception(err)
1215
                raise PermissionDenied
1216
        return wrapper
1217
    return decorator
1218

    
1219

    
1220
class Checksum:
1221
    def __init__(self):
1222
        self.md5 = hashlib.md5()
1223

    
1224
    def update(self, data):
1225
        self.md5.update(data)
1226

    
1227
    def hexdigest(self):
1228
        return self.md5.hexdigest().lower()
1229

    
1230

    
1231
class NoChecksum:
1232
    def update(self, data):
1233
        pass
1234

    
1235
    def hexdigest(self):
1236
        return ''