Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 4d366a03

History | View | Annotate | Download (39.3 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
#
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
#
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
#
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from synnefo.lib.parsedate import parse_http_date_safe, parse_http_date
52
from synnefo.lib.astakos import get_user
53

    
54
from pithos.api.faults import (
55
    Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
56
    Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
57
    RangeNotSatisfiable, InternalServerError, NotImplemented)
58
from pithos.api.short_url import encode_url
59
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
60
                                 BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
61
                                 BACKEND_BLOCK_UMASK,
62
                                 BACKEND_QUEUE_MODULE, BACKEND_QUEUE_HOSTS,
63
                                 BACKEND_QUEUE_EXCHANGE,
64
                                 QUOTAHOLDER_URL, QUOTAHOLDER_TOKEN,
65
                                 BACKEND_QUOTA, BACKEND_VERSIONING,
66
                                 BACKEND_FREE_VERSIONING,
67
                                 AUTHENTICATION_URL, AUTHENTICATION_USERS,
68
                                 SERVICE_TOKEN, COOKIE_NAME, USER_INFO_URL)
69
from pithos.backends import connect_backend
70
from pithos.backends.base import (NotAllowedError, QuotaError, ItemNotExists,
71
                                  VersionNotExists)
72
from synnefo.lib.astakos import get_user_uuid, get_username
73

    
74
import logging
75
import re
76
import hashlib
77
import uuid
78
import decimal
79

    
80

    
81
logger = logging.getLogger(__name__)
82

    
83

    
84
class UTC(tzinfo):
85
    def utcoffset(self, dt):
86
        return timedelta(0)
87

    
88
    def tzname(self, dt):
89
        return 'UTC'
90

    
91
    def dst(self, dt):
92
        return timedelta(0)
93

    
94

    
95
def json_encode_decimal(obj):
96
    if isinstance(obj, decimal.Decimal):
97
        return str(obj)
98
    raise TypeError(repr(obj) + " is not JSON serializable")
99

    
100

    
101
def isoformat(d):
102
    """Return an ISO8601 date string that includes a timezone."""
103

    
104
    return d.replace(tzinfo=UTC()).isoformat()
105

    
106

    
107
def rename_meta_key(d, old, new):
108
    if old not in d:
109
        return
110
    d[new] = d[old]
111
    del(d[old])
112

    
113

    
114
def printable_header_dict(d):
115
    """Format a meta dictionary for printing out json/xml.
116

117
    Convert all keys to lower case and replace dashes with underscores.
118
    Format 'last_modified' timestamp.
119
    """
120

    
121
    if 'last_modified' in d and d['last_modified']:
122
        d['last_modified'] = isoformat(
123
            datetime.fromtimestamp(d['last_modified']))
124
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
125

    
126

    
127
def format_header_key(k):
128
    """Convert underscores to dashes and capitalize intra-dash strings."""
129
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
130

    
131

    
132
def get_header_prefix(request, prefix):
133
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
134

    
135
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
136
    # TODO: Document or remove '~' replacing.
137
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
138

    
139

    
140
def check_meta_headers(meta):
141
    if len(meta) > 90:
142
        raise BadRequest('Too many headers.')
143
    for k, v in meta.iteritems():
144
        if len(k) > 128:
145
            raise BadRequest('Header name too large.')
146
        if len(v) > 256:
147
            raise BadRequest('Header value too large.')
148

    
149

    
150
def get_account_headers(request):
151
    meta = get_header_prefix(request, 'X-Account-Meta-')
152
    check_meta_headers(meta)
153
    groups = {}
154
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
155
        n = k[16:].lower()
156
        if '-' in n or '_' in n:
157
            raise BadRequest('Bad characters in group name')
158
        groups[n] = v.replace(' ', '').split(',')
159
        while '' in groups[n]:
160
            groups[n].remove('')
161
    return meta, groups
162

    
163

    
164
def put_account_translation_headers(response, accounts):
165
    for x in accounts:
166
        k = smart_str('X-Account-Presentation-%s' % x, strings_only=True)
167
        v = smart_str(retrieve_username(x), strings_only=True)
168
        response[k] = v
169

    
170

    
171
def put_account_headers(response, meta, groups, policy):
172
    if 'count' in meta:
173
        response['X-Account-Container-Count'] = meta['count']
174
    if 'bytes' in meta:
175
        response['X-Account-Bytes-Used'] = meta['bytes']
176
    response['Last-Modified'] = http_date(int(meta['modified']))
177
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
178
        response[smart_str(
179
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
180
    if 'until_timestamp' in meta:
181
        response['X-Account-Until-Timestamp'] = http_date(
182
            int(meta['until_timestamp']))
183
    for k, v in groups.iteritems():
184
        k = smart_str(k, strings_only=True)
185
        k = format_header_key('X-Account-Group-' + k)
186
        v = smart_str(','.join(v), strings_only=True)
187
        response[k] = v
188
    for k, v in policy.iteritems():
189
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
190

    
191

    
192
def get_container_headers(request):
193
    meta = get_header_prefix(request, 'X-Container-Meta-')
194
    check_meta_headers(meta)
195
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
196
    return meta, policy
197

    
198

    
199
def put_container_headers(request, response, meta, policy):
200
    if 'count' in meta:
201
        response['X-Container-Object-Count'] = meta['count']
202
    if 'bytes' in meta:
203
        response['X-Container-Bytes-Used'] = meta['bytes']
204
    response['Last-Modified'] = http_date(int(meta['modified']))
205
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
206
        response[smart_str(
207
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
208
    l = [smart_str(x, strings_only=True) for x in meta['object_meta']
209
         if x.startswith('X-Object-Meta-')]
210
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
211
    response['X-Container-Block-Size'] = request.backend.block_size
212
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
213
    if 'until_timestamp' in meta:
214
        response['X-Container-Until-Timestamp'] = http_date(
215
            int(meta['until_timestamp']))
216
    for k, v in policy.iteritems():
217
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
218

    
219

    
220
def get_object_headers(request):
221
    content_type = request.META.get('CONTENT_TYPE', None)
222
    meta = get_header_prefix(request, 'X-Object-Meta-')
223
    check_meta_headers(meta)
224
    if request.META.get('HTTP_CONTENT_ENCODING'):
225
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
226
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
227
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
228
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
229
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
230
    return content_type, meta, get_sharing(request), get_public(request)
231

    
232

    
233
def put_object_headers(response, meta, restricted=False):
234
    response['ETag'] = meta['checksum']
235
    response['Content-Length'] = meta['bytes']
236
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
237
    response['Last-Modified'] = http_date(int(meta['modified']))
238
    if not restricted:
239
        response['X-Object-Hash'] = meta['hash']
240
        response['X-Object-UUID'] = meta['uuid']
241
        modified_by = retrieve_username(meta['modified_by'])
242
        response['X-Object-Modified-By'] = smart_str(
243
            modified_by, strings_only=True)
244
        response['X-Object-Version'] = meta['version']
245
        response['X-Object-Version-Timestamp'] = http_date(
246
            int(meta['version_timestamp']))
247
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
248
            response[smart_str(
249
                k, strings_only=True)] = smart_str(meta[k], strings_only=True)
250
        for k in (
251
            'Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
252
            'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
253
                'X-Object-Public'):
254
            if k in meta:
255
                response[k] = smart_str(meta[k], strings_only=True)
256
    else:
257
        for k in ('Content-Encoding', 'Content-Disposition'):
258
            if k in meta:
259
                response[k] = smart_str(meta[k], strings_only=True)
260

    
261

    
262
def update_manifest_meta(request, v_account, meta):
263
    """Update metadata if the object has an X-Object-Manifest."""
264

    
265
    if 'X-Object-Manifest' in meta:
266
        etag = ''
267
        bytes = 0
268
        try:
269
            src_container, src_name = split_container_object_string(
270
                '/' + meta['X-Object-Manifest'])
271
            objects = request.backend.list_objects(
272
                request.user_uniq, v_account,
273
                src_container, prefix=src_name, virtual=False)
274
            for x in objects:
275
                src_meta = request.backend.get_object_meta(request.user_uniq,
276
                                                           v_account, src_container, x[0], 'pithos', x[1])
277
                etag += src_meta['checksum']
278
                bytes += src_meta['bytes']
279
        except:
280
            # Ignore errors.
281
            return
282
        meta['bytes'] = bytes
283
        md5 = hashlib.md5()
284
        md5.update(etag)
285
        meta['checksum'] = md5.hexdigest().lower()
286

    
287
def is_uuid(str):
288
    try:
289
        uuid.UUID(str)
290
    except ValueError:
291
       return False
292
    else:
293
       return True
294

    
295
def retrieve_username(uuid):
296
    try:
297
        return get_username(
298
            SERVICE_TOKEN, uuid, USER_INFO_URL, AUTHENTICATION_USERS)
299
    except:
300
        # if it fails just leave the metadata intact
301
        return uuid
302

    
303
def retrieve_uuid(username):
304
    if is_uuid(username):
305
            return username
306

    
307
    try:
308
        return get_user_uuid(
309
            SERVICE_TOKEN, username, USER_INFO_URL, AUTHENTICATION_USERS)
310
    except Exception, e:
311
        if e.args:
312
            status = e.args[-1]
313
            if status == 404:
314
                raise ItemNotExists(username)
315
        raise
316

    
317
def replace_permissions_username(holder):
318
    try:
319
        # check first for a group permission
320
        account, group = holder.split(':')
321
    except ValueError:
322
        return retrieve_uuid(holder)
323
    else:
324
        return ':'.join([retrieve_uuid(account), group])
325

    
326
def replace_permissions_uuid(holder):
327
    try:
328
        # check first for a group permission
329
        account, group = holder.split(':')
330
    except ValueError:
331
        return retrieve_username(holder)
332
    else:
333
        return ':'.join([retrieve_username(account), group])
334

    
335
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
336
    if permissions is None:
337
        return
338
    allowed, perm_path, perms = permissions
339
    if len(perms) == 0:
340
        return
341

    
342
    perms['read'] = [replace_permissions_uuid(x) for x in perms.get('read', [])]
343
    perms['write'] = \
344
        [replace_permissions_uuid(x) for x in perms.get('write', [])]
345

    
346
    ret = []
347

    
348
    r = ','.join(perms.get('read', []))
349
    if r:
350
        ret.append('read=' + r)
351
    w = ','.join(perms.get('write', []))
352
    if w:
353
        ret.append('write=' + w)
354
    meta['X-Object-Sharing'] = '; '.join(ret)
355
    if '/'.join((v_account, v_container, v_object)) != perm_path:
356
        meta['X-Object-Shared-By'] = perm_path
357
    if request.user_uniq != v_account:
358
        meta['X-Object-Allowed-To'] = allowed
359

    
360

    
361
def update_public_meta(public, meta):
362
    if not public:
363
        return
364
    meta['X-Object-Public'] = '/public/' + encode_url(public)
365

    
366

    
367
def validate_modification_preconditions(request, meta):
368
    """Check that the modified timestamp conforms with the preconditions set."""
369

    
370
    if 'modified' not in meta:
371
        return  # TODO: Always return?
372

    
373
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
374
    if if_modified_since is not None:
375
        if_modified_since = parse_http_date_safe(if_modified_since)
376
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
377
        raise NotModified('Resource has not been modified')
378

    
379
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
380
    if if_unmodified_since is not None:
381
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
382
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
383
        raise PreconditionFailed('Resource has been modified')
384

    
385

    
386
def validate_matching_preconditions(request, meta):
387
    """Check that the ETag conforms with the preconditions set."""
388

    
389
    etag = meta['checksum']
390
    if not etag:
391
        etag = None
392

    
393
    if_match = request.META.get('HTTP_IF_MATCH')
394
    if if_match is not None:
395
        if etag is None:
396
            raise PreconditionFailed('Resource does not exist')
397
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
398
            raise PreconditionFailed('Resource ETag does not match')
399

    
400
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
401
    if if_none_match is not None:
402
        # TODO: If this passes, must ignore If-Modified-Since header.
403
        if etag is not None:
404
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
405
                # TODO: Continue if an If-Modified-Since header is present.
406
                if request.method in ('HEAD', 'GET'):
407
                    raise NotModified('Resource ETag matches')
408
                raise PreconditionFailed('Resource exists or ETag matches')
409

    
410

    
411
def split_container_object_string(s):
412
    if not len(s) > 0 or s[0] != '/':
413
        raise ValueError
414
    s = s[1:]
415
    pos = s.find('/')
416
    if pos == -1 or pos == len(s) - 1:
417
        raise ValueError
418
    return s[:pos], s[(pos + 1):]
419

    
420

    
421
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False, delimiter=None):
422
    """Copy or move an object."""
423

    
424
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
425
        del(request.META['CONTENT_TYPE'])
426
    content_type, meta, permissions, public = get_object_headers(request)
427
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
428
    try:
429
        if move:
430
            version_id = request.backend.move_object(
431
                request.user_uniq, src_account, src_container, src_name,
432
                dest_account, dest_container, dest_name,
433
                content_type, 'pithos', meta, False, permissions, delimiter)
434
        else:
435
            version_id = request.backend.copy_object(
436
                request.user_uniq, src_account, src_container, src_name,
437
                dest_account, dest_container, dest_name,
438
                content_type, 'pithos', meta, False, permissions, src_version, delimiter)
439
    except NotAllowedError:
440
        raise Forbidden('Not allowed')
441
    except (ItemNotExists, VersionNotExists):
442
        raise ItemNotFound('Container or object does not exist')
443
    except ValueError:
444
        raise BadRequest('Invalid sharing header')
445
    except QuotaError:
446
        raise RequestEntityTooLarge('Quota exceeded')
447
    if public is not None:
448
        try:
449
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
450
        except NotAllowedError:
451
            raise Forbidden('Not allowed')
452
        except ItemNotExists:
453
            raise ItemNotFound('Object does not exist')
454
    return version_id
455

    
456

    
457
def get_int_parameter(p):
458
    if p is not None:
459
        try:
460
            p = int(p)
461
        except ValueError:
462
            return None
463
        if p < 0:
464
            return None
465
    return p
466

    
467

    
468
def get_content_length(request):
469
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
470
    if content_length is None:
471
        raise LengthRequired('Missing or invalid Content-Length header')
472
    return content_length
473

    
474

    
475
def get_range(request, size):
476
    """Parse a Range header from the request.
477

478
    Either returns None, when the header is not existent or should be ignored,
479
    or a list of (offset, length) tuples - should be further checked.
480
    """
481

    
482
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
483
    if not ranges.startswith('bytes='):
484
        return None
485

    
486
    ret = []
487
    for r in (x.strip() for x in ranges[6:].split(',')):
488
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
489
        m = p.match(r)
490
        if not m:
491
            return None
492
        offset = m.group('offset')
493
        upto = m.group('upto')
494
        if offset == '' and upto == '':
495
            return None
496

    
497
        if offset != '':
498
            offset = int(offset)
499
            if upto != '':
500
                upto = int(upto)
501
                if offset > upto:
502
                    return None
503
                ret.append((offset, upto - offset + 1))
504
            else:
505
                ret.append((offset, size - offset))
506
        else:
507
            length = int(upto)
508
            ret.append((size - length, length))
509

    
510
    return ret
511

    
512

    
513
def get_content_range(request):
514
    """Parse a Content-Range header from the request.
515

516
    Either returns None, when the header is not existent or should be ignored,
517
    or an (offset, length, total) tuple - check as length, total may be None.
518
    Returns (None, None, None) if the provided range is '*/*'.
519
    """
520

    
521
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
522
    if not ranges:
523
        return None
524

    
525
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
526
    m = p.match(ranges)
527
    if not m:
528
        if ranges == 'bytes */*':
529
            return (None, None, None)
530
        return None
531
    offset = int(m.group('offset'))
532
    upto = m.group('upto')
533
    total = m.group('total')
534
    if upto != '':
535
        upto = int(upto)
536
    else:
537
        upto = None
538
    if total != '*':
539
        total = int(total)
540
    else:
541
        total = None
542
    if (upto is not None and offset > upto) or \
543
        (total is not None and offset >= total) or \
544
            (total is not None and upto is not None and upto >= total):
545
        return None
546

    
547
    if upto is None:
548
        length = None
549
    else:
550
        length = upto - offset + 1
551
    return (offset, length, total)
552

    
553

    
554
def get_sharing(request):
555
    """Parse an X-Object-Sharing header from the request.
556

557
    Raises BadRequest on error.
558
    """
559

    
560
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
561
    if permissions is None:
562
        return None
563

    
564
    # TODO: Document or remove '~' replacing.
565
    permissions = permissions.replace('~', '')
566

    
567
    ret = {}
568
    permissions = permissions.replace(' ', '')
569
    if permissions == '':
570
        return ret
571
    for perm in (x for x in permissions.split(';')):
572
        if perm.startswith('read='):
573
            ret['read'] = list(set(
574
                [v.replace(' ', '').lower() for v in perm[5:].split(',')]))
575
            if '' in ret['read']:
576
                ret['read'].remove('')
577
            if '*' in ret['read']:
578
                ret['read'] = ['*']
579
            if len(ret['read']) == 0:
580
                raise BadRequest(
581
                    'Bad X-Object-Sharing header value: invalid length')
582
        elif perm.startswith('write='):
583
            ret['write'] = list(set(
584
                [v.replace(' ', '').lower() for v in perm[6:].split(',')]))
585
            if '' in ret['write']:
586
                ret['write'].remove('')
587
            if '*' in ret['write']:
588
                ret['write'] = ['*']
589
            if len(ret['write']) == 0:
590
                raise BadRequest(
591
                    'Bad X-Object-Sharing header value: invalid length')
592
        else:
593
            raise BadRequest(
594
                'Bad X-Object-Sharing header value: missing prefix')
595

    
596
    # replace username with uuid
597
    try:
598
        ret['read'] = \
599
            [replace_permissions_username(x) for x in ret.get('read', [])]
600
        ret['write'] = \
601
            [replace_permissions_username(x) for x in ret.get('write', [])]
602
    except ItemNotExists, e:
603
        raise BadRequest(
604
            'Bad X-Object-Sharing header value: unknown account: %s' % e)
605

    
606
    # Keep duplicates only in write list.
607
    dups = [x for x in ret.get(
608
        'read', []) if x in ret.get('write', []) and x != '*']
609
    if dups:
610
        for x in dups:
611
            ret['read'].remove(x)
612
        if len(ret['read']) == 0:
613
            del(ret['read'])
614

    
615
    return ret
616

    
617

    
618
def get_public(request):
619
    """Parse an X-Object-Public header from the request.
620

621
    Raises BadRequest on error.
622
    """
623

    
624
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
625
    if public is None:
626
        return None
627

    
628
    public = public.replace(' ', '').lower()
629
    if public == 'true':
630
        return True
631
    elif public == 'false' or public == '':
632
        return False
633
    raise BadRequest('Bad X-Object-Public header value')
634

    
635

    
636
def raw_input_socket(request):
637
    """Return the socket for reading the rest of the request."""
638

    
639
    server_software = request.META.get('SERVER_SOFTWARE')
640
    if server_software and server_software.startswith('mod_python'):
641
        return request._req
642
    if 'wsgi.input' in request.environ:
643
        return request.environ['wsgi.input']
644
    raise NotImplemented('Unknown server software')
645

    
646
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024)  # 5GB
647

    
648

    
649
def socket_read_iterator(request, length=0, blocksize=4096):
650
    """Return a maximum of blocksize data read from the socket in each iteration.
651

652
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
653
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
654
    """
655

    
656
    sock = raw_input_socket(request)
657
    if length < 0:  # Chunked transfers
658
        # Small version (server does the dechunking).
659
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
660
            while length < MAX_UPLOAD_SIZE:
661
                data = sock.read(blocksize)
662
                if data == '':
663
                    return
664
                yield data
665
            raise BadRequest('Maximum size is reached')
666

    
667
        # Long version (do the dechunking).
668
        data = ''
669
        while length < MAX_UPLOAD_SIZE:
670
            # Get chunk size.
671
            if hasattr(sock, 'readline'):
672
                chunk_length = sock.readline()
673
            else:
674
                chunk_length = ''
675
                while chunk_length[-1:] != '\n':
676
                    chunk_length += sock.read(1)
677
                chunk_length.strip()
678
            pos = chunk_length.find(';')
679
            if pos >= 0:
680
                chunk_length = chunk_length[:pos]
681
            try:
682
                chunk_length = int(chunk_length, 16)
683
            except Exception, e:
684
                raise BadRequest('Bad chunk size')
685
                                 # TODO: Change to something more appropriate.
686
            # Check if done.
687
            if chunk_length == 0:
688
                if len(data) > 0:
689
                    yield data
690
                return
691
            # Get the actual data.
692
            while chunk_length > 0:
693
                chunk = sock.read(min(chunk_length, blocksize))
694
                chunk_length -= len(chunk)
695
                if length > 0:
696
                    length += len(chunk)
697
                data += chunk
698
                if len(data) >= blocksize:
699
                    ret = data[:blocksize]
700
                    data = data[blocksize:]
701
                    yield ret
702
            sock.read(2)  # CRLF
703
        raise BadRequest('Maximum size is reached')
704
    else:
705
        if length > MAX_UPLOAD_SIZE:
706
            raise BadRequest('Maximum size is reached')
707
        while length > 0:
708
            data = sock.read(min(length, blocksize))
709
            if not data:
710
                raise BadRequest()
711
            length -= len(data)
712
            yield data
713

    
714

    
715
class SaveToBackendHandler(FileUploadHandler):
716
    """Handle a file from an HTML form the django way."""
717

    
718
    def __init__(self, request=None):
719
        super(SaveToBackendHandler, self).__init__(request)
720
        self.backend = request.backend
721

    
722
    def put_data(self, length):
723
        if len(self.data) >= length:
724
            block = self.data[:length]
725
            self.file.hashmap.append(self.backend.put_block(block))
726
            self.md5.update(block)
727
            self.data = self.data[length:]
728

    
729
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
730
        self.md5 = hashlib.md5()
731
        self.data = ''
732
        self.file = UploadedFile(
733
            name=file_name, content_type=content_type, charset=charset)
734
        self.file.size = 0
735
        self.file.hashmap = []
736

    
737
    def receive_data_chunk(self, raw_data, start):
738
        self.data += raw_data
739
        self.file.size += len(raw_data)
740
        self.put_data(self.request.backend.block_size)
741
        return None
742

    
743
    def file_complete(self, file_size):
744
        l = len(self.data)
745
        if l > 0:
746
            self.put_data(l)
747
        self.file.etag = self.md5.hexdigest().lower()
748
        return self.file
749

    
750

    
751
class ObjectWrapper(object):
752
    """Return the object's data block-per-block in each iteration.
753

754
    Read from the object using the offset and length provided in each entry of the range list.
755
    """
756

    
757
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
758
        self.backend = backend
759
        self.ranges = ranges
760
        self.sizes = sizes
761
        self.hashmaps = hashmaps
762
        self.boundary = boundary
763
        self.size = sum(self.sizes)
764

    
765
        self.file_index = 0
766
        self.block_index = 0
767
        self.block_hash = -1
768
        self.block = ''
769

    
770
        self.range_index = -1
771
        self.offset, self.length = self.ranges[0]
772

    
773
    def __iter__(self):
774
        return self
775

    
776
    def part_iterator(self):
777
        if self.length > 0:
778
            # Get the file for the current offset.
779
            file_size = self.sizes[self.file_index]
780
            while self.offset >= file_size:
781
                self.offset -= file_size
782
                self.file_index += 1
783
                file_size = self.sizes[self.file_index]
784

    
785
            # Get the block for the current position.
786
            self.block_index = int(self.offset / self.backend.block_size)
787
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
788
                self.block_hash = self.hashmaps[
789
                    self.file_index][self.block_index]
790
                try:
791
                    self.block = self.backend.get_block(self.block_hash)
792
                except ItemNotExists:
793
                    raise ItemNotFound('Block does not exist')
794

    
795
            # Get the data from the block.
796
            bo = self.offset % self.backend.block_size
797
            bs = self.backend.block_size
798
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
799
                    self.sizes[self.file_index] % self.backend.block_size):
800
                bs = self.sizes[self.file_index] % self.backend.block_size
801
            bl = min(self.length, bs - bo)
802
            data = self.block[bo:bo + bl]
803
            self.offset += bl
804
            self.length -= bl
805
            return data
806
        else:
807
            raise StopIteration
808

    
809
    def next(self):
810
        if len(self.ranges) == 1:
811
            return self.part_iterator()
812
        if self.range_index == len(self.ranges):
813
            raise StopIteration
814
        try:
815
            if self.range_index == -1:
816
                raise StopIteration
817
            return self.part_iterator()
818
        except StopIteration:
819
            self.range_index += 1
820
            out = []
821
            if self.range_index < len(self.ranges):
822
                # Part header.
823
                self.offset, self.length = self.ranges[self.range_index]
824
                self.file_index = 0
825
                if self.range_index > 0:
826
                    out.append('')
827
                out.append('--' + self.boundary)
828
                out.append('Content-Range: bytes %d-%d/%d' % (
829
                    self.offset, self.offset + self.length - 1, self.size))
830
                out.append('Content-Transfer-Encoding: binary')
831
                out.append('')
832
                out.append('')
833
                return '\r\n'.join(out)
834
            else:
835
                # Footer.
836
                out.append('')
837
                out.append('--' + self.boundary + '--')
838
                out.append('')
839
                return '\r\n'.join(out)
840

    
841

    
842
def object_data_response(request, sizes, hashmaps, meta, public=False):
843
    """Get the HttpResponse object for replying with the object's data."""
844

    
845
    # Range handling.
846
    size = sum(sizes)
847
    ranges = get_range(request, size)
848
    if ranges is None:
849
        ranges = [(0, size)]
850
        ret = 200
851
    else:
852
        check = [True for offset, length in ranges if
853
                 length <= 0 or length > size or
854
                 offset < 0 or offset >= size or
855
                 offset + length > size]
856
        if len(check) > 0:
857
            raise RangeNotSatisfiable('Requested range exceeds object limits')
858
        ret = 206
859
        if_range = request.META.get('HTTP_IF_RANGE')
860
        if if_range:
861
            try:
862
                # Modification time has passed instead.
863
                last_modified = parse_http_date(if_range)
864
                if last_modified != meta['modified']:
865
                    ranges = [(0, size)]
866
                    ret = 200
867
            except ValueError:
868
                if if_range != meta['checksum']:
869
                    ranges = [(0, size)]
870
                    ret = 200
871

    
872
    if ret == 206 and len(ranges) > 1:
873
        boundary = uuid.uuid4().hex
874
    else:
875
        boundary = ''
876
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
877
    response = HttpResponse(wrapper, status=ret)
878
    put_object_headers(response, meta, public)
879
    if ret == 206:
880
        if len(ranges) == 1:
881
            offset, length = ranges[0]
882
            response[
883
                'Content-Length'] = length  # Update with the correct length.
884
            response['Content-Range'] = 'bytes %d-%d/%d' % (
885
                offset, offset + length - 1, size)
886
        else:
887
            del(response['Content-Length'])
888
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (
889
                boundary,)
890
    return response
891

    
892

    
893
def put_object_block(request, hashmap, data, offset):
894
    """Put one block of data at the given offset."""
895

    
896
    bi = int(offset / request.backend.block_size)
897
    bo = offset % request.backend.block_size
898
    bl = min(len(data), request.backend.block_size - bo)
899
    if bi < len(hashmap):
900
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
901
    else:
902
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
903
    return bl  # Return ammount of data written.
904

    
905

    
906
def hashmap_md5(backend, hashmap, size):
907
    """Produce the MD5 sum from the data in the hashmap."""
908

    
909
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
910
    md5 = hashlib.md5()
911
    bs = backend.block_size
912
    for bi, hash in enumerate(hashmap):
913
        data = backend.get_block(hash)  # Blocks come in padded.
914
        if bi == len(hashmap) - 1:
915
            data = data[:size % bs]
916
        md5.update(data)
917
    return md5.hexdigest().lower()
918

    
919

    
920
def simple_list_response(request, l):
921
    if request.serialization == 'text':
922
        return '\n'.join(l) + '\n'
923
    if request.serialization == 'xml':
924
        return render_to_string('items.xml', {'items': l})
925
    if request.serialization == 'json':
926
        return json.dumps(l)
927

    
928

    
929
from pithos.backends.util import PithosBackendPool
930
POOL_SIZE = 5
931

    
932

    
933
_pithos_backend_pool = PithosBackendPool(size=POOL_SIZE,
934
                                         db_module=BACKEND_DB_MODULE,
935
                                         db_connection=BACKEND_DB_CONNECTION,
936
                                         block_module=BACKEND_BLOCK_MODULE,
937
                                         block_path=BACKEND_BLOCK_PATH,
938
                                         block_umask=BACKEND_BLOCK_UMASK,
939
                                         queue_module=BACKEND_QUEUE_MODULE,
940
                                         queue_hosts=BACKEND_QUEUE_HOSTS,
941
                                         queue_exchange=BACKEND_QUEUE_EXCHANGE,
942
                                         quotaholder_url=QUOTAHOLDER_URL,
943
                                         quotaholder_token=QUOTAHOLDER_TOKEN,
944
                                         free_versioning=BACKEND_FREE_VERSIONING)
945

    
946

    
947
def get_backend():
948
    backend = _pithos_backend_pool.pool_get()
949
    backend.default_policy['quota'] = BACKEND_QUOTA
950
    backend.default_policy['versioning'] = BACKEND_VERSIONING
951
    backend.messages = []
952
    return backend
953

    
954

    
955
def update_request_headers(request):
956
    # Handle URL-encoded keys and values.
957
    meta = dict([(
958
        k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
959
    for k, v in meta.iteritems():
960
        try:
961
            k.decode('ascii')
962
            v.decode('ascii')
963
        except UnicodeDecodeError:
964
            raise BadRequest('Bad character in headers.')
965
        if '%' in k or '%' in v:
966
            del(request.META[k])
967
            request.META[unquote(k)] = smart_unicode(unquote(
968
                v), strings_only=True)
969

    
970

    
971
def update_response_headers(request, response):
972
    if request.serialization == 'xml':
973
        response['Content-Type'] = 'application/xml; charset=UTF-8'
974
    elif request.serialization == 'json':
975
        response['Content-Type'] = 'application/json; charset=UTF-8'
976
    elif not response['Content-Type']:
977
        response['Content-Type'] = 'text/plain; charset=UTF-8'
978

    
979
    if (not response.has_header('Content-Length') and
980
        not (response.has_header('Content-Type') and
981
             response['Content-Type'].startswith('multipart/byteranges'))):
982
        response['Content-Length'] = len(response.content)
983

    
984
    # URL-encode unicode in headers.
985
    meta = response.items()
986
    for k, v in meta:
987
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
988
                k.startswith('X-Object-') or k.startswith('Content-')):
989
            del(response[k])
990
            response[quote(k)] = quote(v, safe='/=,:@; ')
991

    
992

    
993
def render_fault(request, fault):
994
    if isinstance(fault, InternalServerError) and settings.DEBUG:
995
        fault.details = format_exc(fault)
996

    
997
    request.serialization = 'text'
998
    data = fault.message + '\n'
999
    if fault.details:
1000
        data += '\n' + fault.details
1001
    response = HttpResponse(data, status=fault.code)
1002
    update_response_headers(request, response)
1003
    return response
1004

    
1005

    
1006
def request_serialization(request, format_allowed=False):
1007
    """Return the serialization format requested.
1008

1009
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
1010
    """
1011

    
1012
    if not format_allowed:
1013
        return 'text'
1014

    
1015
    format = request.GET.get('format')
1016
    if format == 'json':
1017
        return 'json'
1018
    elif format == 'xml':
1019
        return 'xml'
1020

    
1021
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
1022
        accept, sep, rest = item.strip().partition(';')
1023
        if accept == 'application/json':
1024
            return 'json'
1025
        elif accept == 'application/xml' or accept == 'text/xml':
1026
            return 'xml'
1027

    
1028
    return 'text'
1029

    
1030
class User(unicode):
1031
    pass
1032

    
1033
def get_pithos_usage(usage):
1034
    for u in usage:
1035
        if u.get('name') == 'pithos+.diskspace':
1036
            return u
1037

    
1038
def api_method(http_method=None, format_allowed=False, user_required=True,
1039
        request_usage=False):
1040
    """Decorator function for views that implement an API method."""
1041

    
1042
    def decorator(func):
1043
        @wraps(func)
1044
        def wrapper(request, *args, **kwargs):
1045
            try:
1046
                if http_method and request.method != http_method:
1047
                    raise BadRequest('Method not allowed.')
1048

    
1049
                if user_required:
1050
                    token = None
1051
                    if request.method in ('HEAD', 'GET') and COOKIE_NAME in request.COOKIES:
1052
                        cookie_value = unquote(
1053
                            request.COOKIES.get(COOKIE_NAME, ''))
1054
                        account, sep, token = cookie_value.partition('|')
1055
                    get_user(request,
1056
                             AUTHENTICATION_URL,
1057
                             AUTHENTICATION_USERS,
1058
                             token,
1059
                             user_required)
1060
                    if  getattr(request, 'user', None) is None:
1061
                        raise Unauthorized('Access denied')
1062
                    assert getattr(request, 'user_uniq', None) != None
1063
                    request.user_uniq = User(request.user_uniq)
1064
                    request.user_uniq.uuid = request.user.get('uuid')
1065
                    request.user_usage = get_pithos_usage(
1066
                        request.user.get('usage'))
1067
                
1068
                # The args variable may contain up to (account, container, object).
1069
                if len(args) > 1 and len(args[1]) > 256:
1070
                    raise BadRequest('Container name too large.')
1071
                if len(args) > 2 and len(args[2]) > 1024:
1072
                    raise BadRequest('Object name too large.')
1073

    
1074
                # Format and check headers.
1075
                update_request_headers(request)
1076

    
1077
                # Fill in custom request variables.
1078
                request.serialization = request_serialization(
1079
                    request, format_allowed)
1080
                request.backend = get_backend()
1081

    
1082
                response = func(request, *args, **kwargs)
1083
                update_response_headers(request, response)
1084
                return response
1085
            except Fault, fault:
1086
                if fault.code >= 500:
1087
                    logger.exception("API Fault")
1088
                return render_fault(request, fault)
1089
            except BaseException, e:
1090
                logger.exception('Unexpected error: %s' % e)
1091
                fault = InternalServerError('Unexpected error: %s' % e)
1092
                return render_fault(request, fault)
1093
            finally:
1094
                if getattr(request, 'backend', None) is not None:
1095
                    request.backend.close()
1096
        return wrapper
1097
    return decorator