Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ 32454501

History | View | Annotate | Download (38.5 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
#
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
#
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
#
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from synnefo.lib.parsedate import parse_http_date_safe, parse_http_date
52
from synnefo.lib.astakos import get_user
53

    
54
from pithos.api.faults import (
55
    Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
56
    Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
57
    RangeNotSatisfiable, InternalServerError, NotImplemented)
58
from pithos.api.short_url import encode_url
59
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
60
                                 BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
61
                                 BACKEND_BLOCK_UMASK,
62
                                 BACKEND_QUEUE_MODULE, BACKEND_QUEUE_HOSTS,
63
                                 BACKEND_QUEUE_EXCHANGE,
64
                                 QUOTAHOLDER_URL, QUOTAHOLDER_TOKEN,
65
                                 BACKEND_QUOTA, BACKEND_VERSIONING,
66
                                 BACKEND_FREE_VERSIONING,
67
                                 AUTHENTICATION_URL, AUTHENTICATION_USERS,
68
                                 SERVICE_TOKEN, COOKIE_NAME, USER_INFO_URL)
69
from pithos.backends import connect_backend
70
from pithos.backends.base import (NotAllowedError, QuotaError, ItemNotExists,
71
                                  VersionNotExists)
72
from synnefo.lib.astakos import get_user_uuid, get_username
73

    
74
import logging
75
import re
76
import hashlib
77
import uuid
78
import decimal
79

    
80

    
81
logger = logging.getLogger(__name__)
82

    
83

    
84
class UTC(tzinfo):
85
    def utcoffset(self, dt):
86
        return timedelta(0)
87

    
88
    def tzname(self, dt):
89
        return 'UTC'
90

    
91
    def dst(self, dt):
92
        return timedelta(0)
93

    
94

    
95
def json_encode_decimal(obj):
96
    if isinstance(obj, decimal.Decimal):
97
        return str(obj)
98
    raise TypeError(repr(obj) + " is not JSON serializable")
99

    
100

    
101
def isoformat(d):
102
    """Return an ISO8601 date string that includes a timezone."""
103

    
104
    return d.replace(tzinfo=UTC()).isoformat()
105

    
106

    
107
def rename_meta_key(d, old, new):
108
    if old not in d:
109
        return
110
    d[new] = d[old]
111
    del(d[old])
112

    
113

    
114
def printable_header_dict(d):
115
    """Format a meta dictionary for printing out json/xml.
116

117
    Convert all keys to lower case and replace dashes with underscores.
118
    Format 'last_modified' timestamp.
119
    """
120

    
121
    if 'last_modified' in d and d['last_modified']:
122
        d['last_modified'] = isoformat(
123
            datetime.fromtimestamp(d['last_modified']))
124
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
125

    
126

    
127
def format_header_key(k):
128
    """Convert underscores to dashes and capitalize intra-dash strings."""
129
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
130

    
131

    
132
def get_header_prefix(request, prefix):
133
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
134

    
135
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
136
    # TODO: Document or remove '~' replacing.
137
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
138

    
139

    
140
def check_meta_headers(meta):
141
    if len(meta) > 90:
142
        raise BadRequest('Too many headers.')
143
    for k, v in meta.iteritems():
144
        if len(k) > 128:
145
            raise BadRequest('Header name too large.')
146
        if len(v) > 256:
147
            raise BadRequest('Header value too large.')
148

    
149

    
150
def get_account_headers(request):
151
    meta = get_header_prefix(request, 'X-Account-Meta-')
152
    check_meta_headers(meta)
153
    groups = {}
154
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
155
        n = k[16:].lower()
156
        if '-' in n or '_' in n:
157
            raise BadRequest('Bad characters in group name')
158
        groups[n] = v.replace(' ', '').split(',')
159
        while '' in groups[n]:
160
            groups[n].remove('')
161
    return meta, groups
162

    
163

    
164
def put_account_headers(response, meta, groups, policy):
165
    if 'count' in meta:
166
        response['X-Account-Container-Count'] = meta['count']
167
    if 'bytes' in meta:
168
        response['X-Account-Bytes-Used'] = meta['bytes']
169
    response['Last-Modified'] = http_date(int(meta['modified']))
170
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
171
        response[smart_str(
172
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
173
    if 'until_timestamp' in meta:
174
        response['X-Account-Until-Timestamp'] = http_date(
175
            int(meta['until_timestamp']))
176
    for k, v in groups.iteritems():
177
        k = smart_str(k, strings_only=True)
178
        k = format_header_key('X-Account-Group-' + k)
179
        v = smart_str(','.join(v), strings_only=True)
180
        response[k] = v
181
    for k, v in policy.iteritems():
182
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
183

    
184

    
185
def get_container_headers(request):
186
    meta = get_header_prefix(request, 'X-Container-Meta-')
187
    check_meta_headers(meta)
188
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
189
    return meta, policy
190

    
191

    
192
def put_container_headers(request, response, meta, policy):
193
    if 'count' in meta:
194
        response['X-Container-Object-Count'] = meta['count']
195
    if 'bytes' in meta:
196
        response['X-Container-Bytes-Used'] = meta['bytes']
197
    response['Last-Modified'] = http_date(int(meta['modified']))
198
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
199
        response[smart_str(
200
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
201
    l = [smart_str(x, strings_only=True) for x in meta['object_meta']
202
         if x.startswith('X-Object-Meta-')]
203
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
204
    response['X-Container-Block-Size'] = request.backend.block_size
205
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
206
    if 'until_timestamp' in meta:
207
        response['X-Container-Until-Timestamp'] = http_date(
208
            int(meta['until_timestamp']))
209
    for k, v in policy.iteritems():
210
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
211

    
212

    
213
def get_object_headers(request):
214
    content_type = request.META.get('CONTENT_TYPE', None)
215
    meta = get_header_prefix(request, 'X-Object-Meta-')
216
    check_meta_headers(meta)
217
    if request.META.get('HTTP_CONTENT_ENCODING'):
218
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
219
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
220
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
221
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
222
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
223
    return content_type, meta, get_sharing(request), get_public(request)
224

    
225

    
226
def put_object_headers(response, meta, restricted=False):
227
    response['ETag'] = meta['checksum']
228
    response['Content-Length'] = meta['bytes']
229
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
230
    response['Last-Modified'] = http_date(int(meta['modified']))
231
    if not restricted:
232
        response['X-Object-Hash'] = meta['hash']
233
        response['X-Object-UUID'] = meta['uuid']
234
        response['X-Object-Modified-By'] = smart_str(
235
            meta['modified_by'], strings_only=True)
236
        response['X-Object-Version'] = meta['version']
237
        response['X-Object-Version-Timestamp'] = http_date(
238
            int(meta['version_timestamp']))
239
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
240
            response[smart_str(
241
                k, strings_only=True)] = smart_str(meta[k], strings_only=True)
242
        for k in (
243
            'Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
244
            'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
245
                'X-Object-Public'):
246
            if k in meta:
247
                response[k] = smart_str(meta[k], strings_only=True)
248
    else:
249
        for k in ('Content-Encoding', 'Content-Disposition'):
250
            if k in meta:
251
                response[k] = smart_str(meta[k], strings_only=True)
252

    
253

    
254
def update_manifest_meta(request, v_account, meta):
255
    """Update metadata if the object has an X-Object-Manifest."""
256

    
257
    if 'X-Object-Manifest' in meta:
258
        etag = ''
259
        bytes = 0
260
        try:
261
            src_container, src_name = split_container_object_string(
262
                '/' + meta['X-Object-Manifest'])
263
            objects = request.backend.list_objects(
264
                request.user_uniq, v_account,
265
                src_container, prefix=src_name, virtual=False)
266
            for x in objects:
267
                src_meta = request.backend.get_object_meta(request.user_uniq,
268
                                                           v_account, src_container, x[0], 'pithos', x[1])
269
                etag += src_meta['checksum']
270
                bytes += src_meta['bytes']
271
        except:
272
            # Ignore errors.
273
            return
274
        meta['bytes'] = bytes
275
        md5 = hashlib.md5()
276
        md5.update(etag)
277
        meta['checksum'] = md5.hexdigest().lower()
278

    
279

    
280
def retrieve_username(uuid):
281
    try:
282
        return get_username(
283
            SERVICE_TOKEN, uuid, USER_INFO_URL, AUTHENTICATION_USERS)
284
    except:
285
        # if it fails just leave the metadata intact
286
        return uuid
287

    
288
def retrieve_uuid(username):
289
    try:
290
        return get_user_uuid(
291
            SERVICE_TOKEN, username, USER_INFO_URL, AUTHENTICATION_USERS)
292
    except Exception, e:
293
        if e.args:
294
            status = e.args[-1]
295
            if status == 404:
296
                raise ItemNotExists(username)
297
        raise
298

    
299
def replace_permissions_username(holder):
300
    try:
301
        # check first for a group permission
302
        account, group = holder.split(':')
303
    except ValueError:
304
        return retrieve_uuid(holder)
305
    else:
306
        return ':'.join([retrieve_uuid(account), group])
307

    
308
def replace_permissions_uuid(holder):
309
    try:
310
        # check first for a group permission
311
        account, group = holder.split(':')
312
    except ValueError:
313
        return retrieve_username(holder)
314
    else:
315
        return ':'.join([retrieve_username(account), group])
316

    
317
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
318
    if permissions is None:
319
        return
320
    allowed, perm_path, perms = permissions
321
    if len(perms) == 0:
322
        return
323

    
324
    perms['read'] = [replace_permissions_uuid(x) for x in perms.get('read', [])]
325
    perms['write'] = \
326
        [replace_permissions_uuid(x) for x in perms.get('write', [])]
327

    
328
    ret = []
329

    
330
    r = ','.join(perms.get('read', []))
331
    if r:
332
        ret.append('read=' + r)
333
    w = ','.join(perms.get('write', []))
334
    if w:
335
        ret.append('write=' + w)
336
    meta['X-Object-Sharing'] = '; '.join(ret)
337
    if '/'.join((v_account, v_container, v_object)) != perm_path:
338
        meta['X-Object-Shared-By'] = perm_path
339
    if request.user_uniq != v_account:
340
        meta['X-Object-Allowed-To'] = allowed
341

    
342

    
343
def update_public_meta(public, meta):
344
    if not public:
345
        return
346
    meta['X-Object-Public'] = '/public/' + encode_url(public)
347

    
348

    
349
def validate_modification_preconditions(request, meta):
350
    """Check that the modified timestamp conforms with the preconditions set."""
351

    
352
    if 'modified' not in meta:
353
        return  # TODO: Always return?
354

    
355
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
356
    if if_modified_since is not None:
357
        if_modified_since = parse_http_date_safe(if_modified_since)
358
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
359
        raise NotModified('Resource has not been modified')
360

    
361
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
362
    if if_unmodified_since is not None:
363
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
364
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
365
        raise PreconditionFailed('Resource has been modified')
366

    
367

    
368
def validate_matching_preconditions(request, meta):
369
    """Check that the ETag conforms with the preconditions set."""
370

    
371
    etag = meta['checksum']
372
    if not etag:
373
        etag = None
374

    
375
    if_match = request.META.get('HTTP_IF_MATCH')
376
    if if_match is not None:
377
        if etag is None:
378
            raise PreconditionFailed('Resource does not exist')
379
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
380
            raise PreconditionFailed('Resource ETag does not match')
381

    
382
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
383
    if if_none_match is not None:
384
        # TODO: If this passes, must ignore If-Modified-Since header.
385
        if etag is not None:
386
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
387
                # TODO: Continue if an If-Modified-Since header is present.
388
                if request.method in ('HEAD', 'GET'):
389
                    raise NotModified('Resource ETag matches')
390
                raise PreconditionFailed('Resource exists or ETag matches')
391

    
392

    
393
def split_container_object_string(s):
394
    if not len(s) > 0 or s[0] != '/':
395
        raise ValueError
396
    s = s[1:]
397
    pos = s.find('/')
398
    if pos == -1 or pos == len(s) - 1:
399
        raise ValueError
400
    return s[:pos], s[(pos + 1):]
401

    
402

    
403
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False, delimiter=None):
404
    """Copy or move an object."""
405

    
406
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
407
        del(request.META['CONTENT_TYPE'])
408
    content_type, meta, permissions, public = get_object_headers(request)
409
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
410
    try:
411
        if move:
412
            version_id = request.backend.move_object(
413
                request.user_uniq, src_account, src_container, src_name,
414
                dest_account, dest_container, dest_name,
415
                content_type, 'pithos', meta, False, permissions, delimiter)
416
        else:
417
            version_id = request.backend.copy_object(
418
                request.user_uniq, src_account, src_container, src_name,
419
                dest_account, dest_container, dest_name,
420
                content_type, 'pithos', meta, False, permissions, src_version, delimiter)
421
    except NotAllowedError:
422
        raise Forbidden('Not allowed')
423
    except (ItemNotExists, VersionNotExists):
424
        raise ItemNotFound('Container or object does not exist')
425
    except ValueError:
426
        raise BadRequest('Invalid sharing header')
427
    except QuotaError:
428
        raise RequestEntityTooLarge('Quota exceeded')
429
    if public is not None:
430
        try:
431
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
432
        except NotAllowedError:
433
            raise Forbidden('Not allowed')
434
        except ItemNotExists:
435
            raise ItemNotFound('Object does not exist')
436
    return version_id
437

    
438

    
439
def get_int_parameter(p):
440
    if p is not None:
441
        try:
442
            p = int(p)
443
        except ValueError:
444
            return None
445
        if p < 0:
446
            return None
447
    return p
448

    
449

    
450
def get_content_length(request):
451
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
452
    if content_length is None:
453
        raise LengthRequired('Missing or invalid Content-Length header')
454
    return content_length
455

    
456

    
457
def get_range(request, size):
458
    """Parse a Range header from the request.
459

460
    Either returns None, when the header is not existent or should be ignored,
461
    or a list of (offset, length) tuples - should be further checked.
462
    """
463

    
464
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
465
    if not ranges.startswith('bytes='):
466
        return None
467

    
468
    ret = []
469
    for r in (x.strip() for x in ranges[6:].split(',')):
470
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
471
        m = p.match(r)
472
        if not m:
473
            return None
474
        offset = m.group('offset')
475
        upto = m.group('upto')
476
        if offset == '' and upto == '':
477
            return None
478

    
479
        if offset != '':
480
            offset = int(offset)
481
            if upto != '':
482
                upto = int(upto)
483
                if offset > upto:
484
                    return None
485
                ret.append((offset, upto - offset + 1))
486
            else:
487
                ret.append((offset, size - offset))
488
        else:
489
            length = int(upto)
490
            ret.append((size - length, length))
491

    
492
    return ret
493

    
494

    
495
def get_content_range(request):
496
    """Parse a Content-Range header from the request.
497

498
    Either returns None, when the header is not existent or should be ignored,
499
    or an (offset, length, total) tuple - check as length, total may be None.
500
    Returns (None, None, None) if the provided range is '*/*'.
501
    """
502

    
503
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
504
    if not ranges:
505
        return None
506

    
507
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
508
    m = p.match(ranges)
509
    if not m:
510
        if ranges == 'bytes */*':
511
            return (None, None, None)
512
        return None
513
    offset = int(m.group('offset'))
514
    upto = m.group('upto')
515
    total = m.group('total')
516
    if upto != '':
517
        upto = int(upto)
518
    else:
519
        upto = None
520
    if total != '*':
521
        total = int(total)
522
    else:
523
        total = None
524
    if (upto is not None and offset > upto) or \
525
        (total is not None and offset >= total) or \
526
            (total is not None and upto is not None and upto >= total):
527
        return None
528

    
529
    if upto is None:
530
        length = None
531
    else:
532
        length = upto - offset + 1
533
    return (offset, length, total)
534

    
535

    
536
def get_sharing(request):
537
    """Parse an X-Object-Sharing header from the request.
538

539
    Raises BadRequest on error.
540
    """
541

    
542
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
543
    if permissions is None:
544
        return None
545

    
546
    # TODO: Document or remove '~' replacing.
547
    permissions = permissions.replace('~', '')
548

    
549
    ret = {}
550
    permissions = permissions.replace(' ', '')
551
    if permissions == '':
552
        return ret
553
    for perm in (x for x in permissions.split(';')):
554
        if perm.startswith('read='):
555
            ret['read'] = list(set(
556
                [v.replace(' ', '').lower() for v in perm[5:].split(',')]))
557
            if '' in ret['read']:
558
                ret['read'].remove('')
559
            if '*' in ret['read']:
560
                ret['read'] = ['*']
561
            if len(ret['read']) == 0:
562
                raise BadRequest(
563
                    'Bad X-Object-Sharing header value: invalid length')
564
        elif perm.startswith('write='):
565
            ret['write'] = list(set(
566
                [v.replace(' ', '').lower() for v in perm[6:].split(',')]))
567
            if '' in ret['write']:
568
                ret['write'].remove('')
569
            if '*' in ret['write']:
570
                ret['write'] = ['*']
571
            if len(ret['write']) == 0:
572
                raise BadRequest(
573
                    'Bad X-Object-Sharing header value: invalid length')
574
        else:
575
            raise BadRequest(
576
                'Bad X-Object-Sharing header value: missing prefix')
577

    
578
    # replace username with uuid
579
    try:
580
        ret['read'] = \
581
            [replace_permissions_username(x) for x in ret.get('read', [])]
582
        ret['write'] = \
583
            [replace_permissions_username(x) for x in ret.get('write', [])]
584
    except ItemNotFound, e:
585
        raise BadRequest(
586
            'Bad X-Object-Sharing header value: unknown account: %s' % e)
587

    
588
    # Keep duplicates only in write list.
589
    dups = [x for x in ret.get(
590
        'read', []) if x in ret.get('write', []) and x != '*']
591
    if dups:
592
        for x in dups:
593
            ret['read'].remove(x)
594
        if len(ret['read']) == 0:
595
            del(ret['read'])
596

    
597
    return ret
598

    
599

    
600
def get_public(request):
601
    """Parse an X-Object-Public header from the request.
602

603
    Raises BadRequest on error.
604
    """
605

    
606
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
607
    if public is None:
608
        return None
609

    
610
    public = public.replace(' ', '').lower()
611
    if public == 'true':
612
        return True
613
    elif public == 'false' or public == '':
614
        return False
615
    raise BadRequest('Bad X-Object-Public header value')
616

    
617

    
618
def raw_input_socket(request):
619
    """Return the socket for reading the rest of the request."""
620

    
621
    server_software = request.META.get('SERVER_SOFTWARE')
622
    if server_software and server_software.startswith('mod_python'):
623
        return request._req
624
    if 'wsgi.input' in request.environ:
625
        return request.environ['wsgi.input']
626
    raise NotImplemented('Unknown server software')
627

    
628
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024)  # 5GB
629

    
630

    
631
def socket_read_iterator(request, length=0, blocksize=4096):
632
    """Return a maximum of blocksize data read from the socket in each iteration.
633

634
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
635
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
636
    """
637

    
638
    sock = raw_input_socket(request)
639
    if length < 0:  # Chunked transfers
640
        # Small version (server does the dechunking).
641
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
642
            while length < MAX_UPLOAD_SIZE:
643
                data = sock.read(blocksize)
644
                if data == '':
645
                    return
646
                yield data
647
            raise BadRequest('Maximum size is reached')
648

    
649
        # Long version (do the dechunking).
650
        data = ''
651
        while length < MAX_UPLOAD_SIZE:
652
            # Get chunk size.
653
            if hasattr(sock, 'readline'):
654
                chunk_length = sock.readline()
655
            else:
656
                chunk_length = ''
657
                while chunk_length[-1:] != '\n':
658
                    chunk_length += sock.read(1)
659
                chunk_length.strip()
660
            pos = chunk_length.find(';')
661
            if pos >= 0:
662
                chunk_length = chunk_length[:pos]
663
            try:
664
                chunk_length = int(chunk_length, 16)
665
            except Exception, e:
666
                raise BadRequest('Bad chunk size')
667
                                 # TODO: Change to something more appropriate.
668
            # Check if done.
669
            if chunk_length == 0:
670
                if len(data) > 0:
671
                    yield data
672
                return
673
            # Get the actual data.
674
            while chunk_length > 0:
675
                chunk = sock.read(min(chunk_length, blocksize))
676
                chunk_length -= len(chunk)
677
                if length > 0:
678
                    length += len(chunk)
679
                data += chunk
680
                if len(data) >= blocksize:
681
                    ret = data[:blocksize]
682
                    data = data[blocksize:]
683
                    yield ret
684
            sock.read(2)  # CRLF
685
        raise BadRequest('Maximum size is reached')
686
    else:
687
        if length > MAX_UPLOAD_SIZE:
688
            raise BadRequest('Maximum size is reached')
689
        while length > 0:
690
            data = sock.read(min(length, blocksize))
691
            if not data:
692
                raise BadRequest()
693
            length -= len(data)
694
            yield data
695

    
696

    
697
class SaveToBackendHandler(FileUploadHandler):
698
    """Handle a file from an HTML form the django way."""
699

    
700
    def __init__(self, request=None):
701
        super(SaveToBackendHandler, self).__init__(request)
702
        self.backend = request.backend
703

    
704
    def put_data(self, length):
705
        if len(self.data) >= length:
706
            block = self.data[:length]
707
            self.file.hashmap.append(self.backend.put_block(block))
708
            self.md5.update(block)
709
            self.data = self.data[length:]
710

    
711
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
712
        self.md5 = hashlib.md5()
713
        self.data = ''
714
        self.file = UploadedFile(
715
            name=file_name, content_type=content_type, charset=charset)
716
        self.file.size = 0
717
        self.file.hashmap = []
718

    
719
    def receive_data_chunk(self, raw_data, start):
720
        self.data += raw_data
721
        self.file.size += len(raw_data)
722
        self.put_data(self.request.backend.block_size)
723
        return None
724

    
725
    def file_complete(self, file_size):
726
        l = len(self.data)
727
        if l > 0:
728
            self.put_data(l)
729
        self.file.etag = self.md5.hexdigest().lower()
730
        return self.file
731

    
732

    
733
class ObjectWrapper(object):
734
    """Return the object's data block-per-block in each iteration.
735

736
    Read from the object using the offset and length provided in each entry of the range list.
737
    """
738

    
739
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
740
        self.backend = backend
741
        self.ranges = ranges
742
        self.sizes = sizes
743
        self.hashmaps = hashmaps
744
        self.boundary = boundary
745
        self.size = sum(self.sizes)
746

    
747
        self.file_index = 0
748
        self.block_index = 0
749
        self.block_hash = -1
750
        self.block = ''
751

    
752
        self.range_index = -1
753
        self.offset, self.length = self.ranges[0]
754

    
755
    def __iter__(self):
756
        return self
757

    
758
    def part_iterator(self):
759
        if self.length > 0:
760
            # Get the file for the current offset.
761
            file_size = self.sizes[self.file_index]
762
            while self.offset >= file_size:
763
                self.offset -= file_size
764
                self.file_index += 1
765
                file_size = self.sizes[self.file_index]
766

    
767
            # Get the block for the current position.
768
            self.block_index = int(self.offset / self.backend.block_size)
769
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
770
                self.block_hash = self.hashmaps[
771
                    self.file_index][self.block_index]
772
                try:
773
                    self.block = self.backend.get_block(self.block_hash)
774
                except ItemNotExists:
775
                    raise ItemNotFound('Block does not exist')
776

    
777
            # Get the data from the block.
778
            bo = self.offset % self.backend.block_size
779
            bs = self.backend.block_size
780
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
781
                    self.sizes[self.file_index] % self.backend.block_size):
782
                bs = self.sizes[self.file_index] % self.backend.block_size
783
            bl = min(self.length, bs - bo)
784
            data = self.block[bo:bo + bl]
785
            self.offset += bl
786
            self.length -= bl
787
            return data
788
        else:
789
            raise StopIteration
790

    
791
    def next(self):
792
        if len(self.ranges) == 1:
793
            return self.part_iterator()
794
        if self.range_index == len(self.ranges):
795
            raise StopIteration
796
        try:
797
            if self.range_index == -1:
798
                raise StopIteration
799
            return self.part_iterator()
800
        except StopIteration:
801
            self.range_index += 1
802
            out = []
803
            if self.range_index < len(self.ranges):
804
                # Part header.
805
                self.offset, self.length = self.ranges[self.range_index]
806
                self.file_index = 0
807
                if self.range_index > 0:
808
                    out.append('')
809
                out.append('--' + self.boundary)
810
                out.append('Content-Range: bytes %d-%d/%d' % (
811
                    self.offset, self.offset + self.length - 1, self.size))
812
                out.append('Content-Transfer-Encoding: binary')
813
                out.append('')
814
                out.append('')
815
                return '\r\n'.join(out)
816
            else:
817
                # Footer.
818
                out.append('')
819
                out.append('--' + self.boundary + '--')
820
                out.append('')
821
                return '\r\n'.join(out)
822

    
823

    
824
def object_data_response(request, sizes, hashmaps, meta, public=False):
825
    """Get the HttpResponse object for replying with the object's data."""
826

    
827
    # Range handling.
828
    size = sum(sizes)
829
    ranges = get_range(request, size)
830
    if ranges is None:
831
        ranges = [(0, size)]
832
        ret = 200
833
    else:
834
        check = [True for offset, length in ranges if
835
                 length <= 0 or length > size or
836
                 offset < 0 or offset >= size or
837
                 offset + length > size]
838
        if len(check) > 0:
839
            raise RangeNotSatisfiable('Requested range exceeds object limits')
840
        ret = 206
841
        if_range = request.META.get('HTTP_IF_RANGE')
842
        if if_range:
843
            try:
844
                # Modification time has passed instead.
845
                last_modified = parse_http_date(if_range)
846
                if last_modified != meta['modified']:
847
                    ranges = [(0, size)]
848
                    ret = 200
849
            except ValueError:
850
                if if_range != meta['checksum']:
851
                    ranges = [(0, size)]
852
                    ret = 200
853

    
854
    if ret == 206 and len(ranges) > 1:
855
        boundary = uuid.uuid4().hex
856
    else:
857
        boundary = ''
858
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
859
    response = HttpResponse(wrapper, status=ret)
860
    put_object_headers(response, meta, public)
861
    if ret == 206:
862
        if len(ranges) == 1:
863
            offset, length = ranges[0]
864
            response[
865
                'Content-Length'] = length  # Update with the correct length.
866
            response['Content-Range'] = 'bytes %d-%d/%d' % (
867
                offset, offset + length - 1, size)
868
        else:
869
            del(response['Content-Length'])
870
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (
871
                boundary,)
872
    return response
873

    
874

    
875
def put_object_block(request, hashmap, data, offset):
876
    """Put one block of data at the given offset."""
877

    
878
    bi = int(offset / request.backend.block_size)
879
    bo = offset % request.backend.block_size
880
    bl = min(len(data), request.backend.block_size - bo)
881
    if bi < len(hashmap):
882
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
883
    else:
884
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
885
    return bl  # Return ammount of data written.
886

    
887

    
888
def hashmap_md5(backend, hashmap, size):
889
    """Produce the MD5 sum from the data in the hashmap."""
890

    
891
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
892
    md5 = hashlib.md5()
893
    bs = backend.block_size
894
    for bi, hash in enumerate(hashmap):
895
        data = backend.get_block(hash)  # Blocks come in padded.
896
        if bi == len(hashmap) - 1:
897
            data = data[:size % bs]
898
        md5.update(data)
899
    return md5.hexdigest().lower()
900

    
901

    
902
def simple_list_response(request, l):
903
    if request.serialization == 'text':
904
        return '\n'.join(l) + '\n'
905
    if request.serialization == 'xml':
906
        return render_to_string('items.xml', {'items': l})
907
    if request.serialization == 'json':
908
        return json.dumps(l)
909

    
910

    
911
from pithos.backends.util import PithosBackendPool
912
POOL_SIZE = 5
913

    
914

    
915
_pithos_backend_pool = PithosBackendPool(size=POOL_SIZE,
916
                                         db_module=BACKEND_DB_MODULE,
917
                                         db_connection=BACKEND_DB_CONNECTION,
918
                                         block_module=BACKEND_BLOCK_MODULE,
919
                                         block_path=BACKEND_BLOCK_PATH,
920
                                         block_umask=BACKEND_BLOCK_UMASK,
921
                                         queue_module=BACKEND_QUEUE_MODULE,
922
                                         queue_hosts=BACKEND_QUEUE_HOSTS,
923
                                         queue_exchange=BACKEND_QUEUE_EXCHANGE,
924
                                         quotaholder_url=QUOTAHOLDER_URL,
925
                                         quotaholder_token=QUOTAHOLDER_TOKEN,
926
                                         free_versioning=BACKEND_FREE_VERSIONING)
927

    
928

    
929
def get_backend():
930
    backend = _pithos_backend_pool.pool_get()
931
    backend.default_policy['quota'] = BACKEND_QUOTA
932
    backend.default_policy['versioning'] = BACKEND_VERSIONING
933
    backend.messages = []
934
    return backend
935

    
936

    
937
def update_request_headers(request):
938
    # Handle URL-encoded keys and values.
939
    meta = dict([(
940
        k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
941
    for k, v in meta.iteritems():
942
        try:
943
            k.decode('ascii')
944
            v.decode('ascii')
945
        except UnicodeDecodeError:
946
            raise BadRequest('Bad character in headers.')
947
        if '%' in k or '%' in v:
948
            del(request.META[k])
949
            request.META[unquote(k)] = smart_unicode(unquote(
950
                v), strings_only=True)
951

    
952

    
953
def update_response_headers(request, response):
954
    if request.serialization == 'xml':
955
        response['Content-Type'] = 'application/xml; charset=UTF-8'
956
    elif request.serialization == 'json':
957
        response['Content-Type'] = 'application/json; charset=UTF-8'
958
    elif not response['Content-Type']:
959
        response['Content-Type'] = 'text/plain; charset=UTF-8'
960

    
961
    if (not response.has_header('Content-Length') and
962
        not (response.has_header('Content-Type') and
963
             response['Content-Type'].startswith('multipart/byteranges'))):
964
        response['Content-Length'] = len(response.content)
965

    
966
    # URL-encode unicode in headers.
967
    meta = response.items()
968
    for k, v in meta:
969
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
970
                k.startswith('X-Object-') or k.startswith('Content-')):
971
            del(response[k])
972
            response[quote(k)] = quote(v, safe='/=,:@; ')
973

    
974

    
975
def render_fault(request, fault):
976
    if isinstance(fault, InternalServerError) and settings.DEBUG:
977
        fault.details = format_exc(fault)
978

    
979
    request.serialization = 'text'
980
    data = fault.message + '\n'
981
    if fault.details:
982
        data += '\n' + fault.details
983
    response = HttpResponse(data, status=fault.code)
984
    update_response_headers(request, response)
985
    return response
986

    
987

    
988
def request_serialization(request, format_allowed=False):
989
    """Return the serialization format requested.
990

991
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
992
    """
993

    
994
    if not format_allowed:
995
        return 'text'
996

    
997
    format = request.GET.get('format')
998
    if format == 'json':
999
        return 'json'
1000
    elif format == 'xml':
1001
        return 'xml'
1002

    
1003
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
1004
        accept, sep, rest = item.strip().partition(';')
1005
        if accept == 'application/json':
1006
            return 'json'
1007
        elif accept == 'application/xml' or accept == 'text/xml':
1008
            return 'xml'
1009

    
1010
    return 'text'
1011

    
1012
class User(unicode):
1013
    pass
1014

    
1015
def api_method(http_method=None, format_allowed=False, user_required=True):
1016
    """Decorator function for views that implement an API method."""
1017

    
1018
    def decorator(func):
1019
        @wraps(func)
1020
        def wrapper(request, *args, **kwargs):
1021
            try:
1022
                if http_method and request.method != http_method:
1023
                    raise BadRequest('Method not allowed.')
1024

    
1025
                if user_required:
1026
                    token = None
1027
                    if request.method in ('HEAD', 'GET') and COOKIE_NAME in request.COOKIES:
1028
                        cookie_value = unquote(
1029
                            request.COOKIES.get(COOKIE_NAME, ''))
1030
                        account, sep, token = cookie_value.partition('|')
1031
                    get_user(request,
1032
                             AUTHENTICATION_URL, AUTHENTICATION_USERS, token)
1033
                    if  getattr(request, 'user', None) is None:
1034
                        raise Unauthorized('Access denied')
1035
                    assert getattr(request, 'user_uniq', None) != None
1036
                    request.user_uniq = User(request.user_uniq)
1037
                    request.user_uniq.uuid = request.user.get('uuid')
1038
                
1039
                # The args variable may contain up to (account, container, object).
1040
                if len(args) > 1 and len(args[1]) > 256:
1041
                    raise BadRequest('Container name too large.')
1042
                if len(args) > 2 and len(args[2]) > 1024:
1043
                    raise BadRequest('Object name too large.')
1044

    
1045
                # Format and check headers.
1046
                update_request_headers(request)
1047

    
1048
                # Fill in custom request variables.
1049
                request.serialization = request_serialization(
1050
                    request, format_allowed)
1051
                request.backend = get_backend()
1052

    
1053
                response = func(request, *args, **kwargs)
1054
                update_response_headers(request, response)
1055
                return response
1056
            except Fault, fault:
1057
                if fault.code >= 500:
1058
                    logger.exception("API Fault")
1059
                return render_fault(request, fault)
1060
            except BaseException, e:
1061
                logger.exception('Unexpected error: %s' % e)
1062
                fault = InternalServerError('Unexpected error: %s' % e)
1063
                return render_fault(request, fault)
1064
            finally:
1065
                if getattr(request, 'backend', None) is not None:
1066
                    request.backend.close()
1067
        return wrapper
1068
    return decorator