Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-app / pithos / api / util.py @ f4fbb0fa

History | View | Annotate | Download (36.6 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
#
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
#
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
#
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify, unhexlify
39
from datetime import datetime, tzinfo, timedelta
40
from urllib import quote, unquote
41

    
42
from django.conf import settings
43
from django.http import HttpResponse
44
from django.template.loader import render_to_string
45
from django.utils import simplejson as json
46
from django.utils.http import http_date, parse_etags
47
from django.utils.encoding import smart_unicode, smart_str
48
from django.core.files.uploadhandler import FileUploadHandler
49
from django.core.files.uploadedfile import UploadedFile
50

    
51
from synnefo.lib.parsedate import parse_http_date_safe, parse_http_date
52
from synnefo.lib.astakos import get_user
53

    
54
from pithos.api.faults import (
55
    Fault, NotModified, BadRequest, Unauthorized, Forbidden, ItemNotFound,
56
    Conflict, LengthRequired, PreconditionFailed, RequestEntityTooLarge,
57
    RangeNotSatisfiable, InternalServerError, NotImplemented)
58
from pithos.api.short_url import encode_url
59
from pithos.api.settings import (BACKEND_DB_MODULE, BACKEND_DB_CONNECTION,
60
                                 BACKEND_BLOCK_MODULE, BACKEND_BLOCK_PATH,
61
                                 BACKEND_BLOCK_UMASK,
62
                                 BACKEND_QUEUE_MODULE, BACKEND_QUEUE_HOSTS,
63
                                 BACKEND_QUEUE_EXCHANGE,
64
                                 BACKEND_QUOTA, BACKEND_VERSIONING,
65
                                 AUTHENTICATION_URL, AUTHENTICATION_USERS,
66
                                 SERVICE_TOKEN, COOKIE_NAME)
67

    
68
from pithos.backends import connect_backend
69
from pithos.backends.base import NotAllowedError, QuotaError, ItemNotExists, VersionNotExists
70

    
71
import logging
72
import re
73
import hashlib
74
import uuid
75
import decimal
76

    
77

    
78
logger = logging.getLogger(__name__)
79

    
80

    
81
class UTC(tzinfo):
82
    def utcoffset(self, dt):
83
        return timedelta(0)
84

    
85
    def tzname(self, dt):
86
        return 'UTC'
87

    
88
    def dst(self, dt):
89
        return timedelta(0)
90

    
91

    
92
def json_encode_decimal(obj):
93
    if isinstance(obj, decimal.Decimal):
94
        return str(obj)
95
    raise TypeError(repr(obj) + " is not JSON serializable")
96

    
97

    
98
def isoformat(d):
99
    """Return an ISO8601 date string that includes a timezone."""
100

    
101
    return d.replace(tzinfo=UTC()).isoformat()
102

    
103

    
104
def rename_meta_key(d, old, new):
105
    if old not in d:
106
        return
107
    d[new] = d[old]
108
    del(d[old])
109

    
110

    
111
def printable_header_dict(d):
112
    """Format a meta dictionary for printing out json/xml.
113

114
    Convert all keys to lower case and replace dashes with underscores.
115
    Format 'last_modified' timestamp.
116
    """
117

    
118
    if 'last_modified' in d and d['last_modified']:
119
        d['last_modified'] = isoformat(
120
            datetime.fromtimestamp(d['last_modified']))
121
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
122

    
123

    
124
def format_header_key(k):
125
    """Convert underscores to dashes and capitalize intra-dash strings."""
126
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
127

    
128

    
129
def get_header_prefix(request, prefix):
130
    """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
131

    
132
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
133
    # TODO: Document or remove '~' replacing.
134
    return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
135

    
136

    
137
def check_meta_headers(meta):
138
    if len(meta) > 90:
139
        raise BadRequest('Too many headers.')
140
    for k, v in meta.iteritems():
141
        if len(k) > 128:
142
            raise BadRequest('Header name too large.')
143
        if len(v) > 256:
144
            raise BadRequest('Header value too large.')
145

    
146

    
147
def get_account_headers(request):
148
    meta = get_header_prefix(request, 'X-Account-Meta-')
149
    check_meta_headers(meta)
150
    groups = {}
151
    for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
152
        n = k[16:].lower()
153
        if '-' in n or '_' in n:
154
            raise BadRequest('Bad characters in group name')
155
        groups[n] = v.replace(' ', '').split(',')
156
        while '' in groups[n]:
157
            groups[n].remove('')
158
    return meta, groups
159

    
160

    
161
def put_account_headers(response, meta, groups, policy):
162
    if 'count' in meta:
163
        response['X-Account-Container-Count'] = meta['count']
164
    if 'bytes' in meta:
165
        response['X-Account-Bytes-Used'] = meta['bytes']
166
    response['Last-Modified'] = http_date(int(meta['modified']))
167
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
168
        response[smart_str(
169
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
170
    if 'until_timestamp' in meta:
171
        response['X-Account-Until-Timestamp'] = http_date(
172
            int(meta['until_timestamp']))
173
    for k, v in groups.iteritems():
174
        k = smart_str(k, strings_only=True)
175
        k = format_header_key('X-Account-Group-' + k)
176
        v = smart_str(','.join(v), strings_only=True)
177
        response[k] = v
178
    for k, v in policy.iteritems():
179
        response[smart_str(format_header_key('X-Account-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
180

    
181

    
182
def get_container_headers(request):
183
    meta = get_header_prefix(request, 'X-Container-Meta-')
184
    check_meta_headers(meta)
185
    policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
186
    return meta, policy
187

    
188

    
189
def put_container_headers(request, response, meta, policy):
190
    if 'count' in meta:
191
        response['X-Container-Object-Count'] = meta['count']
192
    if 'bytes' in meta:
193
        response['X-Container-Bytes-Used'] = meta['bytes']
194
    response['Last-Modified'] = http_date(int(meta['modified']))
195
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
196
        response[smart_str(
197
            k, strings_only=True)] = smart_str(meta[k], strings_only=True)
198
    l = [smart_str(x, strings_only=True) for x in meta['object_meta']
199
         if x.startswith('X-Object-Meta-')]
200
    response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
201
    response['X-Container-Block-Size'] = request.backend.block_size
202
    response['X-Container-Block-Hash'] = request.backend.hash_algorithm
203
    if 'until_timestamp' in meta:
204
        response['X-Container-Until-Timestamp'] = http_date(
205
            int(meta['until_timestamp']))
206
    for k, v in policy.iteritems():
207
        response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
208

    
209

    
210
def get_object_headers(request):
211
    content_type = request.META.get('CONTENT_TYPE', None)
212
    meta = get_header_prefix(request, 'X-Object-Meta-')
213
    check_meta_headers(meta)
214
    if request.META.get('HTTP_CONTENT_ENCODING'):
215
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
216
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
217
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
218
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
219
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
220
    return content_type, meta, get_sharing(request), get_public(request)
221

    
222

    
223
def put_object_headers(response, meta, restricted=False):
224
    response['ETag'] = meta['checksum']
225
    response['Content-Length'] = meta['bytes']
226
    response['Content-Type'] = meta.get('type', 'application/octet-stream')
227
    response['Last-Modified'] = http_date(int(meta['modified']))
228
    if not restricted:
229
        response['X-Object-Hash'] = meta['hash']
230
        response['X-Object-UUID'] = meta['uuid']
231
        response['X-Object-Modified-By'] = smart_str(
232
            meta['modified_by'], strings_only=True)
233
        response['X-Object-Version'] = meta['version']
234
        response['X-Object-Version-Timestamp'] = http_date(
235
            int(meta['version_timestamp']))
236
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
237
            response[smart_str(
238
                k, strings_only=True)] = smart_str(meta[k], strings_only=True)
239
        for k in (
240
            'Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
241
            'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
242
                'X-Object-Public'):
243
            if k in meta:
244
                response[k] = smart_str(meta[k], strings_only=True)
245
    else:
246
        for k in ('Content-Encoding', 'Content-Disposition'):
247
            if k in meta:
248
                response[k] = smart_str(meta[k], strings_only=True)
249

    
250

    
251
def update_manifest_meta(request, v_account, meta):
252
    """Update metadata if the object has an X-Object-Manifest."""
253

    
254
    if 'X-Object-Manifest' in meta:
255
        etag = ''
256
        bytes = 0
257
        try:
258
            src_container, src_name = split_container_object_string(
259
                '/' + meta['X-Object-Manifest'])
260
            objects = request.backend.list_objects(
261
                request.user_uniq, v_account,
262
                src_container, prefix=src_name, virtual=False)
263
            for x in objects:
264
                src_meta = request.backend.get_object_meta(request.user_uniq,
265
                                                           v_account, src_container, x[0], 'pithos', x[1])
266
                etag += src_meta['checksum']
267
                bytes += src_meta['bytes']
268
        except:
269
            # Ignore errors.
270
            return
271
        meta['bytes'] = bytes
272
        md5 = hashlib.md5()
273
        md5.update(etag)
274
        meta['checksum'] = md5.hexdigest().lower()
275

    
276

    
277
def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
278
    if permissions is None:
279
        return
280
    allowed, perm_path, perms = permissions
281
    if len(perms) == 0:
282
        return
283
    ret = []
284
    r = ','.join(perms.get('read', []))
285
    if r:
286
        ret.append('read=' + r)
287
    w = ','.join(perms.get('write', []))
288
    if w:
289
        ret.append('write=' + w)
290
    meta['X-Object-Sharing'] = '; '.join(ret)
291
    if '/'.join((v_account, v_container, v_object)) != perm_path:
292
        meta['X-Object-Shared-By'] = perm_path
293
    if request.user_uniq != v_account:
294
        meta['X-Object-Allowed-To'] = allowed
295

    
296

    
297
def update_public_meta(public, meta):
298
    if not public:
299
        return
300
    meta['X-Object-Public'] = '/public/' + encode_url(public)
301

    
302

    
303
def validate_modification_preconditions(request, meta):
304
    """Check that the modified timestamp conforms with the preconditions set."""
305

    
306
    if 'modified' not in meta:
307
        return  # TODO: Always return?
308

    
309
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
310
    if if_modified_since is not None:
311
        if_modified_since = parse_http_date_safe(if_modified_since)
312
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
313
        raise NotModified('Resource has not been modified')
314

    
315
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
316
    if if_unmodified_since is not None:
317
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
318
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
319
        raise PreconditionFailed('Resource has been modified')
320

    
321

    
322
def validate_matching_preconditions(request, meta):
323
    """Check that the ETag conforms with the preconditions set."""
324

    
325
    etag = meta['checksum']
326
    if not etag:
327
        etag = None
328

    
329
    if_match = request.META.get('HTTP_IF_MATCH')
330
    if if_match is not None:
331
        if etag is None:
332
            raise PreconditionFailed('Resource does not exist')
333
        if if_match != '*' and etag not in [x.lower() for x in parse_etags(if_match)]:
334
            raise PreconditionFailed('Resource ETag does not match')
335

    
336
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
337
    if if_none_match is not None:
338
        # TODO: If this passes, must ignore If-Modified-Since header.
339
        if etag is not None:
340
            if if_none_match == '*' or etag in [x.lower() for x in parse_etags(if_none_match)]:
341
                # TODO: Continue if an If-Modified-Since header is present.
342
                if request.method in ('HEAD', 'GET'):
343
                    raise NotModified('Resource ETag matches')
344
                raise PreconditionFailed('Resource exists or ETag matches')
345

    
346

    
347
def split_container_object_string(s):
348
    if not len(s) > 0 or s[0] != '/':
349
        raise ValueError
350
    s = s[1:]
351
    pos = s.find('/')
352
    if pos == -1 or pos == len(s) - 1:
353
        raise ValueError
354
    return s[:pos], s[(pos + 1):]
355

    
356

    
357
def copy_or_move_object(request, src_account, src_container, src_name, dest_account, dest_container, dest_name, move=False, delimiter=None):
358
    """Copy or move an object."""
359

    
360
    if 'ignore_content_type' in request.GET and 'CONTENT_TYPE' in request.META:
361
        del(request.META['CONTENT_TYPE'])
362
    content_type, meta, permissions, public = get_object_headers(request)
363
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
364
    try:
365
        if move:
366
            version_id = request.backend.move_object(
367
                request.user_uniq, src_account, src_container, src_name,
368
                dest_account, dest_container, dest_name,
369
                content_type, 'pithos', meta, False, permissions, delimiter)
370
        else:
371
            version_id = request.backend.copy_object(
372
                request.user_uniq, src_account, src_container, src_name,
373
                dest_account, dest_container, dest_name,
374
                content_type, 'pithos', meta, False, permissions, src_version, delimiter)
375
    except NotAllowedError:
376
        raise Forbidden('Not allowed')
377
    except (ItemNotExists, VersionNotExists):
378
        raise ItemNotFound('Container or object does not exist')
379
    except ValueError:
380
        raise BadRequest('Invalid sharing header')
381
    except QuotaError:
382
        raise RequestEntityTooLarge('Quota exceeded')
383
    if public is not None:
384
        try:
385
            request.backend.update_object_public(request.user_uniq, dest_account, dest_container, dest_name, public)
386
        except NotAllowedError:
387
            raise Forbidden('Not allowed')
388
        except ItemNotExists:
389
            raise ItemNotFound('Object does not exist')
390
    return version_id
391

    
392

    
393
def get_int_parameter(p):
394
    if p is not None:
395
        try:
396
            p = int(p)
397
        except ValueError:
398
            return None
399
        if p < 0:
400
            return None
401
    return p
402

    
403

    
404
def get_content_length(request):
405
    content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
406
    if content_length is None:
407
        raise LengthRequired('Missing or invalid Content-Length header')
408
    return content_length
409

    
410

    
411
def get_range(request, size):
412
    """Parse a Range header from the request.
413

414
    Either returns None, when the header is not existent or should be ignored,
415
    or a list of (offset, length) tuples - should be further checked.
416
    """
417

    
418
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
419
    if not ranges.startswith('bytes='):
420
        return None
421

    
422
    ret = []
423
    for r in (x.strip() for x in ranges[6:].split(',')):
424
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
425
        m = p.match(r)
426
        if not m:
427
            return None
428
        offset = m.group('offset')
429
        upto = m.group('upto')
430
        if offset == '' and upto == '':
431
            return None
432

    
433
        if offset != '':
434
            offset = int(offset)
435
            if upto != '':
436
                upto = int(upto)
437
                if offset > upto:
438
                    return None
439
                ret.append((offset, upto - offset + 1))
440
            else:
441
                ret.append((offset, size - offset))
442
        else:
443
            length = int(upto)
444
            ret.append((size - length, length))
445

    
446
    return ret
447

    
448

    
449
def get_content_range(request):
450
    """Parse a Content-Range header from the request.
451

452
    Either returns None, when the header is not existent or should be ignored,
453
    or an (offset, length, total) tuple - check as length, total may be None.
454
    Returns (None, None, None) if the provided range is '*/*'.
455
    """
456

    
457
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
458
    if not ranges:
459
        return None
460

    
461
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
462
    m = p.match(ranges)
463
    if not m:
464
        if ranges == 'bytes */*':
465
            return (None, None, None)
466
        return None
467
    offset = int(m.group('offset'))
468
    upto = m.group('upto')
469
    total = m.group('total')
470
    if upto != '':
471
        upto = int(upto)
472
    else:
473
        upto = None
474
    if total != '*':
475
        total = int(total)
476
    else:
477
        total = None
478
    if (upto is not None and offset > upto) or \
479
        (total is not None and offset >= total) or \
480
            (total is not None and upto is not None and upto >= total):
481
        return None
482

    
483
    if upto is None:
484
        length = None
485
    else:
486
        length = upto - offset + 1
487
    return (offset, length, total)
488

    
489

    
490
def get_sharing(request):
491
    """Parse an X-Object-Sharing header from the request.
492

493
    Raises BadRequest on error.
494
    """
495

    
496
    permissions = request.META.get('HTTP_X_OBJECT_SHARING')
497
    if permissions is None:
498
        return None
499

    
500
    # TODO: Document or remove '~' replacing.
501
    permissions = permissions.replace('~', '')
502

    
503
    ret = {}
504
    permissions = permissions.replace(' ', '')
505
    if permissions == '':
506
        return ret
507
    for perm in (x for x in permissions.split(';')):
508
        if perm.startswith('read='):
509
            ret['read'] = list(set(
510
                [v.replace(' ', '').lower() for v in perm[5:].split(',')]))
511
            if '' in ret['read']:
512
                ret['read'].remove('')
513
            if '*' in ret['read']:
514
                ret['read'] = ['*']
515
            if len(ret['read']) == 0:
516
                raise BadRequest('Bad X-Object-Sharing header value')
517
        elif perm.startswith('write='):
518
            ret['write'] = list(set(
519
                [v.replace(' ', '').lower() for v in perm[6:].split(',')]))
520
            if '' in ret['write']:
521
                ret['write'].remove('')
522
            if '*' in ret['write']:
523
                ret['write'] = ['*']
524
            if len(ret['write']) == 0:
525
                raise BadRequest('Bad X-Object-Sharing header value')
526
        else:
527
            raise BadRequest('Bad X-Object-Sharing header value')
528

    
529
    # Keep duplicates only in write list.
530
    dups = [x for x in ret.get(
531
        'read', []) if x in ret.get('write', []) and x != '*']
532
    if dups:
533
        for x in dups:
534
            ret['read'].remove(x)
535
        if len(ret['read']) == 0:
536
            del(ret['read'])
537

    
538
    return ret
539

    
540

    
541
def get_public(request):
542
    """Parse an X-Object-Public header from the request.
543

544
    Raises BadRequest on error.
545
    """
546

    
547
    public = request.META.get('HTTP_X_OBJECT_PUBLIC')
548
    if public is None:
549
        return None
550

    
551
    public = public.replace(' ', '').lower()
552
    if public == 'true':
553
        return True
554
    elif public == 'false' or public == '':
555
        return False
556
    raise BadRequest('Bad X-Object-Public header value')
557

    
558

    
559
def raw_input_socket(request):
560
    """Return the socket for reading the rest of the request."""
561

    
562
    server_software = request.META.get('SERVER_SOFTWARE')
563
    if server_software and server_software.startswith('mod_python'):
564
        return request._req
565
    if 'wsgi.input' in request.environ:
566
        return request.environ['wsgi.input']
567
    raise NotImplemented('Unknown server software')
568

    
569
MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024)  # 5GB
570

    
571

    
572
def socket_read_iterator(request, length=0, blocksize=4096):
573
    """Return a maximum of blocksize data read from the socket in each iteration.
574

575
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
576
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
577
    """
578

    
579
    sock = raw_input_socket(request)
580
    if length < 0:  # Chunked transfers
581
        # Small version (server does the dechunking).
582
        if request.environ.get('mod_wsgi.input_chunked', None) or request.META['SERVER_SOFTWARE'].startswith('gunicorn'):
583
            while length < MAX_UPLOAD_SIZE:
584
                data = sock.read(blocksize)
585
                if data == '':
586
                    return
587
                yield data
588
            raise BadRequest('Maximum size is reached')
589

    
590
        # Long version (do the dechunking).
591
        data = ''
592
        while length < MAX_UPLOAD_SIZE:
593
            # Get chunk size.
594
            if hasattr(sock, 'readline'):
595
                chunk_length = sock.readline()
596
            else:
597
                chunk_length = ''
598
                while chunk_length[-1:] != '\n':
599
                    chunk_length += sock.read(1)
600
                chunk_length.strip()
601
            pos = chunk_length.find(';')
602
            if pos >= 0:
603
                chunk_length = chunk_length[:pos]
604
            try:
605
                chunk_length = int(chunk_length, 16)
606
            except Exception, e:
607
                raise BadRequest('Bad chunk size')
608
                                 # TODO: Change to something more appropriate.
609
            # Check if done.
610
            if chunk_length == 0:
611
                if len(data) > 0:
612
                    yield data
613
                return
614
            # Get the actual data.
615
            while chunk_length > 0:
616
                chunk = sock.read(min(chunk_length, blocksize))
617
                chunk_length -= len(chunk)
618
                if length > 0:
619
                    length += len(chunk)
620
                data += chunk
621
                if len(data) >= blocksize:
622
                    ret = data[:blocksize]
623
                    data = data[blocksize:]
624
                    yield ret
625
            sock.read(2)  # CRLF
626
        raise BadRequest('Maximum size is reached')
627
    else:
628
        if length > MAX_UPLOAD_SIZE:
629
            raise BadRequest('Maximum size is reached')
630
        while length > 0:
631
            data = sock.read(min(length, blocksize))
632
            if not data:
633
                raise BadRequest()
634
            length -= len(data)
635
            yield data
636

    
637

    
638
class SaveToBackendHandler(FileUploadHandler):
639
    """Handle a file from an HTML form the django way."""
640

    
641
    def __init__(self, request=None):
642
        super(SaveToBackendHandler, self).__init__(request)
643
        self.backend = request.backend
644

    
645
    def put_data(self, length):
646
        if len(self.data) >= length:
647
            block = self.data[:length]
648
            self.file.hashmap.append(self.backend.put_block(block))
649
            self.md5.update(block)
650
            self.data = self.data[length:]
651

    
652
    def new_file(self, field_name, file_name, content_type, content_length, charset=None):
653
        self.md5 = hashlib.md5()
654
        self.data = ''
655
        self.file = UploadedFile(
656
            name=file_name, content_type=content_type, charset=charset)
657
        self.file.size = 0
658
        self.file.hashmap = []
659

    
660
    def receive_data_chunk(self, raw_data, start):
661
        self.data += raw_data
662
        self.file.size += len(raw_data)
663
        self.put_data(self.request.backend.block_size)
664
        return None
665

    
666
    def file_complete(self, file_size):
667
        l = len(self.data)
668
        if l > 0:
669
            self.put_data(l)
670
        self.file.etag = self.md5.hexdigest().lower()
671
        return self.file
672

    
673

    
674
class ObjectWrapper(object):
675
    """Return the object's data block-per-block in each iteration.
676

677
    Read from the object using the offset and length provided in each entry of the range list.
678
    """
679

    
680
    def __init__(self, backend, ranges, sizes, hashmaps, boundary):
681
        self.backend = backend
682
        self.ranges = ranges
683
        self.sizes = sizes
684
        self.hashmaps = hashmaps
685
        self.boundary = boundary
686
        self.size = sum(self.sizes)
687

    
688
        self.file_index = 0
689
        self.block_index = 0
690
        self.block_hash = -1
691
        self.block = ''
692

    
693
        self.range_index = -1
694
        self.offset, self.length = self.ranges[0]
695

    
696
    def __iter__(self):
697
        return self
698

    
699
    def part_iterator(self):
700
        if self.length > 0:
701
            # Get the file for the current offset.
702
            file_size = self.sizes[self.file_index]
703
            while self.offset >= file_size:
704
                self.offset -= file_size
705
                self.file_index += 1
706
                file_size = self.sizes[self.file_index]
707

    
708
            # Get the block for the current position.
709
            self.block_index = int(self.offset / self.backend.block_size)
710
            if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
711
                self.block_hash = self.hashmaps[
712
                    self.file_index][self.block_index]
713
                try:
714
                    self.block = self.backend.get_block(self.block_hash)
715
                except ItemNotExists:
716
                    raise ItemNotFound('Block does not exist')
717

    
718
            # Get the data from the block.
719
            bo = self.offset % self.backend.block_size
720
            bs = self.backend.block_size
721
            if (self.block_index == len(self.hashmaps[self.file_index]) - 1 and
722
                    self.sizes[self.file_index] % self.backend.block_size):
723
                bs = self.sizes[self.file_index] % self.backend.block_size
724
            bl = min(self.length, bs - bo)
725
            data = self.block[bo:bo + bl]
726
            self.offset += bl
727
            self.length -= bl
728
            return data
729
        else:
730
            raise StopIteration
731

    
732
    def next(self):
733
        if len(self.ranges) == 1:
734
            return self.part_iterator()
735
        if self.range_index == len(self.ranges):
736
            raise StopIteration
737
        try:
738
            if self.range_index == -1:
739
                raise StopIteration
740
            return self.part_iterator()
741
        except StopIteration:
742
            self.range_index += 1
743
            out = []
744
            if self.range_index < len(self.ranges):
745
                # Part header.
746
                self.offset, self.length = self.ranges[self.range_index]
747
                self.file_index = 0
748
                if self.range_index > 0:
749
                    out.append('')
750
                out.append('--' + self.boundary)
751
                out.append('Content-Range: bytes %d-%d/%d' % (
752
                    self.offset, self.offset + self.length - 1, self.size))
753
                out.append('Content-Transfer-Encoding: binary')
754
                out.append('')
755
                out.append('')
756
                return '\r\n'.join(out)
757
            else:
758
                # Footer.
759
                out.append('')
760
                out.append('--' + self.boundary + '--')
761
                out.append('')
762
                return '\r\n'.join(out)
763

    
764

    
765
def object_data_response(request, sizes, hashmaps, meta, public=False):
766
    """Get the HttpResponse object for replying with the object's data."""
767

    
768
    # Range handling.
769
    size = sum(sizes)
770
    ranges = get_range(request, size)
771
    if ranges is None:
772
        ranges = [(0, size)]
773
        ret = 200
774
    else:
775
        check = [True for offset, length in ranges if
776
                 length <= 0 or length > size or
777
                 offset < 0 or offset >= size or
778
                 offset + length > size]
779
        if len(check) > 0:
780
            raise RangeNotSatisfiable('Requested range exceeds object limits')
781
        ret = 206
782
        if_range = request.META.get('HTTP_IF_RANGE')
783
        if if_range:
784
            try:
785
                # Modification time has passed instead.
786
                last_modified = parse_http_date(if_range)
787
                if last_modified != meta['modified']:
788
                    ranges = [(0, size)]
789
                    ret = 200
790
            except ValueError:
791
                if if_range != meta['checksum']:
792
                    ranges = [(0, size)]
793
                    ret = 200
794

    
795
    if ret == 206 and len(ranges) > 1:
796
        boundary = uuid.uuid4().hex
797
    else:
798
        boundary = ''
799
    wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
800
    response = HttpResponse(wrapper, status=ret)
801
    put_object_headers(response, meta, public)
802
    if ret == 206:
803
        if len(ranges) == 1:
804
            offset, length = ranges[0]
805
            response[
806
                'Content-Length'] = length  # Update with the correct length.
807
            response['Content-Range'] = 'bytes %d-%d/%d' % (
808
                offset, offset + length - 1, size)
809
        else:
810
            del(response['Content-Length'])
811
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (
812
                boundary,)
813
    return response
814

    
815

    
816
def put_object_block(request, hashmap, data, offset):
817
    """Put one block of data at the given offset."""
818

    
819
    bi = int(offset / request.backend.block_size)
820
    bo = offset % request.backend.block_size
821
    bl = min(len(data), request.backend.block_size - bo)
822
    if bi < len(hashmap):
823
        hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
824
    else:
825
        hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
826
    return bl  # Return ammount of data written.
827

    
828

    
829
def hashmap_md5(backend, hashmap, size):
830
    """Produce the MD5 sum from the data in the hashmap."""
831

    
832
    # TODO: Search backend for the MD5 of another object with the same hashmap and size...
833
    md5 = hashlib.md5()
834
    bs = backend.block_size
835
    for bi, hash in enumerate(hashmap):
836
        data = backend.get_block(hash)  # Blocks come in padded.
837
        if bi == len(hashmap) - 1:
838
            data = data[:size % bs]
839
        md5.update(data)
840
    return md5.hexdigest().lower()
841

    
842

    
843
def simple_list_response(request, l):
844
    if request.serialization == 'text':
845
        return '\n'.join(l) + '\n'
846
    if request.serialization == 'xml':
847
        return render_to_string('items.xml', {'items': l})
848
    if request.serialization == 'json':
849
        return json.dumps(l)
850

    
851

    
852

    
853
def _get_backend():
854
    backend = connect_backend(db_module=BACKEND_DB_MODULE,
855
                              db_connection=BACKEND_DB_CONNECTION,
856
                              block_module=BACKEND_BLOCK_MODULE,
857
                              block_path=BACKEND_BLOCK_PATH,
858
                              block_umask=BACKEND_BLOCK_UMASK,
859
                              queue_module=BACKEND_QUEUE_MODULE,
860
                              queue_hosts=BACKEND_QUEUE_HOSTS,
861
                              queue_exchange=BACKEND_QUEUE_EXCHANGE)
862
    backend.default_policy['quota'] = BACKEND_QUOTA
863
    backend.default_policy['versioning'] = BACKEND_VERSIONING
864
    return backend
865

    
866

    
867
def _pooled_backend_close(backend):
868
    backend._pool.pool_put(backend)
869

    
870

    
871
from synnefo.lib.pool import ObjectPool
872
from new import instancemethod
873

    
874
USAGE_LIMIT = 500
875
POOL_SIZE = 5
876

    
877
class PithosBackendPool(ObjectPool):
878
    def _pool_create(self):
879
        backend = _get_backend()
880
        backend._real_close = backend.close
881
        backend.close = instancemethod(_pooled_backend_close, backend,
882
                                       type(backend))
883
        backend._pool = self
884
        backend._use_count = USAGE_LIMIT
885
        return backend
886

    
887
    def _pool_cleanup(self, backend):
888
        c = backend._use_count - 1
889
        if c < 0:
890
            backend._real_close()
891
            return True
892

    
893
        backend._use_count = c
894
        return False
895

    
896
_pithos_backend_pool = PithosBackendPool(size=POOL_SIZE)
897

    
898

    
899
def get_backend():
900
    return _pithos_backend_pool.pool_get()
901

    
902

    
903
def update_request_headers(request):
904
    # Handle URL-encoded keys and values.
905
    meta = dict([(
906
        k, v) for k, v in request.META.iteritems() if k.startswith('HTTP_')])
907
    for k, v in meta.iteritems():
908
        try:
909
            k.decode('ascii')
910
            v.decode('ascii')
911
        except UnicodeDecodeError:
912
            raise BadRequest('Bad character in headers.')
913
        if '%' in k or '%' in v:
914
            del(request.META[k])
915
            request.META[unquote(k)] = smart_unicode(unquote(
916
                v), strings_only=True)
917

    
918

    
919
def update_response_headers(request, response):
920
    if request.serialization == 'xml':
921
        response['Content-Type'] = 'application/xml; charset=UTF-8'
922
    elif request.serialization == 'json':
923
        response['Content-Type'] = 'application/json; charset=UTF-8'
924
    elif not response['Content-Type']:
925
        response['Content-Type'] = 'text/plain; charset=UTF-8'
926

    
927
    if (not response.has_header('Content-Length') and
928
        not (response.has_header('Content-Type') and
929
             response['Content-Type'].startswith('multipart/byteranges'))):
930
        response['Content-Length'] = len(response.content)
931

    
932
    # URL-encode unicode in headers.
933
    meta = response.items()
934
    for k, v in meta:
935
        if (k.startswith('X-Account-') or k.startswith('X-Container-') or
936
                k.startswith('X-Object-') or k.startswith('Content-')):
937
            del(response[k])
938
            response[quote(k)] = quote(v, safe='/=,:@; ')
939

    
940

    
941
def render_fault(request, fault):
942
    if isinstance(fault, InternalServerError) and settings.DEBUG:
943
        fault.details = format_exc(fault)
944

    
945
    request.serialization = 'text'
946
    data = fault.message + '\n'
947
    if fault.details:
948
        data += '\n' + fault.details
949
    response = HttpResponse(data, status=fault.code)
950
    update_response_headers(request, response)
951
    return response
952

    
953

    
954
def request_serialization(request, format_allowed=False):
955
    """Return the serialization format requested.
956

957
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
958
    """
959

    
960
    if not format_allowed:
961
        return 'text'
962

    
963
    format = request.GET.get('format')
964
    if format == 'json':
965
        return 'json'
966
    elif format == 'xml':
967
        return 'xml'
968

    
969
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
970
        accept, sep, rest = item.strip().partition(';')
971
        if accept == 'application/json':
972
            return 'json'
973
        elif accept == 'application/xml' or accept == 'text/xml':
974
            return 'xml'
975

    
976
    return 'text'
977

    
978

    
979
def api_method(http_method=None, format_allowed=False, user_required=True):
980
    """Decorator function for views that implement an API method."""
981

    
982
    def decorator(func):
983
        @wraps(func)
984
        def wrapper(request, *args, **kwargs):
985
            try:
986
                if http_method and request.method != http_method:
987
                    raise BadRequest('Method not allowed.')
988

    
989
                if user_required:
990
                    token = None
991
                    if request.method in ('HEAD', 'GET') and COOKIE_NAME in request.COOKIES:
992
                        cookie_value = unquote(
993
                            request.COOKIES.get(COOKIE_NAME, ''))
994
                        if cookie_value and '|' in cookie_value:
995
                            token = cookie_value.split('|', 1)[1]
996
                    get_user(request,
997
                             AUTHENTICATION_URL, AUTHENTICATION_USERS, token)
998
                    if  getattr(request, 'user', None) is None:
999
                        raise Unauthorized('Access denied')
1000

    
1001
                # The args variable may contain up to (account, container, object).
1002
                if len(args) > 1 and len(args[1]) > 256:
1003
                    raise BadRequest('Container name too large.')
1004
                if len(args) > 2 and len(args[2]) > 1024:
1005
                    raise BadRequest('Object name too large.')
1006

    
1007
                # Format and check headers.
1008
                update_request_headers(request)
1009

    
1010
                # Fill in custom request variables.
1011
                request.serialization = request_serialization(
1012
                    request, format_allowed)
1013
                request.backend = get_backend()
1014

    
1015
                response = func(request, *args, **kwargs)
1016
                update_response_headers(request, response)
1017
                return response
1018
            except Fault, fault:
1019
                return render_fault(request, fault)
1020
            except BaseException, e:
1021
                logger.exception('Unexpected error: %s' % e)
1022
                fault = InternalServerError('Unexpected error: %s' % e)
1023
                return render_fault(request, fault)
1024
            finally:
1025
                if getattr(request, 'backend', None) is not None:
1026
                    request.backend.close()
1027
        return wrapper
1028
    return decorator