Statistics
| Branch: | Tag: | Revision:

root / pithos / api / util.py @ 83dd59c5

History | View | Annotate | Download (21.8 kB)

1
# Copyright 2011 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify
39

    
40
from django.conf import settings
41
from django.http import HttpResponse
42
from django.utils.http import http_date, parse_etags
43

    
44
from pithos.api.compat import parse_http_date_safe
45
from pithos.api.faults import (Fault, NotModified, BadRequest, ItemNotFound, LengthRequired,
46
                                PreconditionFailed, RangeNotSatisfiable, ServiceUnavailable)
47
from pithos.backends import backend
48

    
49
import datetime
50
import logging
51
import re
52
import hashlib
53
import uuid
54

    
55

    
56
logger = logging.getLogger(__name__)
57

    
58

    
59
def printable_meta_dict(d):
60
    """Format a meta dictionary for printing out json/xml.
61
    
62
    Convert all keys to lower case and replace dashes to underscores.
63
    Change 'modified' key from backend to 'last_modified' and format date.
64
    """
65
    
66
    if 'modified' in d:
67
        d['last_modified'] = datetime.datetime.fromtimestamp(int(d['modified'])).isoformat()
68
        del(d['modified'])
69
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
70

    
71
def format_meta_key(k):
72
    """Convert underscores to dashes and capitalize intra-dash strings."""
73
    
74
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
75

    
76
def get_meta_prefix(request, prefix):
77
    """Get all prefix-* request headers in a dict. Reformat keys with format_meta_key()."""
78
    
79
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
80
    return dict([(format_meta_key(k[5:]), v) for k, v in request.META.iteritems() if k.startswith(prefix)])
81

    
82
def get_account_meta(request):
83
    """Get metadata from an account request."""
84
    
85
    meta = get_meta_prefix(request, 'X-Account-Meta-')    
86
    return meta
87

    
88
def put_account_meta(response, meta):
89
    """Put metadata in an account response."""
90
    
91
    response['X-Account-Container-Count'] = meta['count']
92
    response['X-Account-Bytes-Used'] = meta['bytes']
93
    if 'modified' in meta:
94
        response['Last-Modified'] = http_date(int(meta['modified']))
95
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
96
        response[k.encode('utf-8')] = meta[k].encode('utf-8')
97
    if 'until_timestamp' in meta:
98
        response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
99

    
100
def get_container_meta(request):
101
    """Get metadata from a container request."""
102
    
103
    meta = get_meta_prefix(request, 'X-Container-Meta-')
104
    return meta
105

    
106
def put_container_meta(response, meta):
107
    """Put metadata in a container response."""
108
    
109
    response['X-Container-Object-Count'] = meta['count']
110
    response['X-Container-Bytes-Used'] = meta['bytes']
111
    response['Last-Modified'] = http_date(int(meta['modified']))
112
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
113
        response[k.encode('utf-8')] = meta[k].encode('utf-8')
114
    response['X-Container-Object-Meta'] = [x[14:] for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
115
    response['X-Container-Block-Size'] = backend.block_size
116
    response['X-Container-Block-Hash'] = backend.hash_algorithm
117
    if 'until_timestamp' in meta:
118
        response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
119

    
120
def get_object_meta(request):
121
    """Get metadata from an object request."""
122
    
123
    meta = get_meta_prefix(request, 'X-Object-Meta-')
124
    if request.META.get('CONTENT_TYPE'):
125
        meta['Content-Type'] = request.META['CONTENT_TYPE']
126
    if request.META.get('HTTP_CONTENT_ENCODING'):
127
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
128
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
129
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
130
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
131
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
132
    if request.META.get('HTTP_X_OBJECT_PUBLIC'):
133
        meta['X-Object-Public'] = request.META['HTTP_X_OBJECT_PUBLIC']
134
    return meta
135

    
136
def put_object_meta(response, meta, public=False):
137
    """Put metadata in an object response."""
138
    
139
    response['ETag'] = meta['hash']
140
    response['Content-Length'] = meta['bytes']
141
    response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
142
    response['Last-Modified'] = http_date(int(meta['modified']))
143
    if not public:
144
        response['X-Object-Version'] = meta['version']
145
        response['X-Object-Version-Timestamp'] = meta['version_timestamp']
146
        for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
147
            response[k.encode('utf-8')] = meta[k].encode('utf-8')
148
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest', 'X-Object-Public'):
149
            if k in meta:
150
                response[k] = meta[k]
151
    else:
152
        for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest'):
153
            if k in meta:
154
                response[k] = meta[k]
155

    
156
def validate_modification_preconditions(request, meta):
157
    """Check that the modified timestamp conforms with the preconditions set."""
158
    
159
    if 'modified' not in meta:
160
        return # TODO: Always return?
161
    
162
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
163
    if if_modified_since is not None:
164
        if_modified_since = parse_http_date_safe(if_modified_since)
165
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
166
        raise NotModified('Resource has not been modified')
167
    
168
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
169
    if if_unmodified_since is not None:
170
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
171
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
172
        raise PreconditionFailed('Resource has been modified')
173

    
174
def validate_matching_preconditions(request, meta):
175
    """Check that the ETag conforms with the preconditions set."""
176
    
177
    if 'hash' not in meta:
178
        return # TODO: Always return?
179
    
180
    if_match = request.META.get('HTTP_IF_MATCH')
181
    if if_match is not None and if_match != '*':
182
        if meta['hash'] not in [x.lower() for x in parse_etags(if_match)]:
183
            raise PreconditionFailed('Resource Etag does not match')
184
    
185
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
186
    if if_none_match is not None:
187
        if if_none_match == '*' or meta['hash'] in [x.lower() for x in parse_etags(if_none_match)]:
188
            raise NotModified('Resource Etag matches')
189

    
190
def split_container_object_string(s):
191
    parts = s.split('/')
192
    if len(parts) < 3 or parts[0] != '':
193
        raise ValueError
194
    return parts[1], '/'.join(parts[2:])
195

    
196
def copy_or_move_object(request, v_account, src_container, src_name, dest_container, dest_name, move=False):
197
    """Copy or move an object."""
198
    
199
    meta = get_object_meta(request)
200
    # Keep previous values of 'Content-Type' (if a new one is absent) and 'hash'.
201
    try:
202
        src_meta = backend.get_object_meta(request.user, v_account, src_container, src_name)
203
    except NameError:
204
        raise ItemNotFound('Container or object does not exist')
205
    if 'Content-Type' in meta and 'Content-Type' in src_meta:
206
        del(src_meta['Content-Type'])
207
    for k in ('Content-Type', 'hash'):
208
        if k in src_meta:
209
            meta[k] = src_meta[k]
210
    
211
    src_version = request.META.get('HTTP_X_SOURCE_VERSION')
212
    try:
213
        if move:
214
            backend.move_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, True, src_version)
215
        else:
216
            backend.copy_object(request.user, v_account, src_container, src_name, dest_container, dest_name, meta, True, src_version)
217
    except NameError:
218
        raise ItemNotFound('Container or object does not exist')
219

    
220
def get_int_parameter(request, name):
221
    p = request.GET.get(name)
222
    if p is not None:
223
        try:
224
            p = int(p)
225
        except ValueError:
226
            return None
227
        if p < 0:
228
            return None
229
    return p
230

    
231
def get_content_length(request):
232
    content_length = request.META.get('CONTENT_LENGTH')
233
    if not content_length:
234
        raise LengthRequired('Missing Content-Length header')
235
    try:
236
        content_length = int(content_length)
237
        if content_length < 0:
238
            raise ValueError
239
    except ValueError:
240
        raise BadRequest('Invalid Content-Length header')
241
    return content_length
242

    
243
def get_range(request, size):
244
    """Parse a Range header from the request.
245
    
246
    Either returns None, when the header is not existent or should be ignored,
247
    or a list of (offset, length) tuples - should be further checked.
248
    """
249
    
250
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
251
    if not ranges.startswith('bytes='):
252
        return None
253
    
254
    ret = []
255
    for r in (x.strip() for x in ranges[6:].split(',')):
256
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
257
        m = p.match(r)
258
        if not m:
259
            return None
260
        offset = m.group('offset')
261
        upto = m.group('upto')
262
        if offset == '' and upto == '':
263
            return None
264
        
265
        if offset != '':
266
            offset = int(offset)
267
            if upto != '':
268
                upto = int(upto)
269
                if offset > upto:
270
                    return None
271
                ret.append((offset, upto - offset + 1))
272
            else:
273
                ret.append((offset, size - offset))
274
        else:
275
            length = int(upto)
276
            ret.append((size - length, length))
277
    
278
    return ret
279

    
280
def get_content_range(request):
281
    """Parse a Content-Range header from the request.
282
    
283
    Either returns None, when the header is not existent or should be ignored,
284
    or an (offset, length, total) tuple - check as length, total may be None.
285
    Returns (None, None, None) if the provided range is '*/*'.
286
    """
287
    
288
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
289
    if not ranges:
290
        return None
291
    
292
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
293
    m = p.match(ranges)
294
    if not m:
295
        if ranges == 'bytes */*':
296
            return (None, None, None)
297
        return None
298
    offset = int(m.group('offset'))
299
    upto = m.group('upto')
300
    total = m.group('total')
301
    if upto != '':
302
        upto = int(upto)
303
    else:
304
        upto = None
305
    if total != '*':
306
        total = int(total)
307
    else:
308
        total = None
309
    if (upto is not None and offset > upto) or \
310
        (total is not None and offset >= total) or \
311
        (total is not None and upto is not None and upto >= total):
312
        return None
313
    
314
    if upto is None:
315
        length = None
316
    else:
317
        length = upto - offset + 1
318
    return (offset, length, total)
319

    
320
def raw_input_socket(request):
321
    """Return the socket for reading the rest of the request."""
322
    
323
    server_software = request.META.get('SERVER_SOFTWARE')
324
    if not server_software:
325
        if 'wsgi.input' in request.environ:
326
            return request.environ['wsgi.input']
327
        raise ServiceUnavailable('Unknown server software')
328
    if server_software.startswith('WSGIServer'):
329
        return request.environ['wsgi.input']
330
    elif server_software.startswith('mod_python'):
331
        return request._req
332
    raise ServiceUnavailable('Unknown server software')
333

    
334
MAX_UPLOAD_SIZE = 10 * (1024 * 1024) # 10MB
335

    
336
def socket_read_iterator(sock, length=0, blocksize=4096):
337
    """Return a maximum of blocksize data read from the socket in each iteration.
338
    
339
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
340
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
341
    """
342
    
343
    if length < 0: # Chunked transfers
344
        data = ''
345
        while length < MAX_UPLOAD_SIZE:
346
            # Get chunk size.
347
            if hasattr(sock, 'readline'):
348
                chunk_length = sock.readline()
349
            else:
350
                chunk_length = ''
351
                while chunk_length[-1:] != '\n':
352
                    chunk_length += sock.read(1)
353
                chunk_length.strip()
354
            pos = chunk_length.find(';')
355
            if pos >= 0:
356
                chunk_length = chunk_length[:pos]
357
            try:
358
                chunk_length = int(chunk_length, 16)
359
            except Exception, e:
360
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
361
            # Check if done.
362
            if chunk_length == 0:
363
                if len(data) > 0:
364
                    yield data
365
                return
366
            # Get the actual data.
367
            while chunk_length > 0:
368
                chunk = sock.read(min(chunk_length, blocksize))
369
                chunk_length -= len(chunk)
370
                length += len(chunk)
371
                data += chunk
372
                if len(data) >= blocksize:
373
                    ret = data[:blocksize]
374
                    data = data[blocksize:]
375
                    yield ret
376
            sock.read(2) # CRLF
377
        # TODO: Raise something to note that maximum size is reached.
378
    else:
379
        if length > MAX_UPLOAD_SIZE:
380
            # TODO: Raise something to note that maximum size is reached.
381
            pass
382
        while length > 0:
383
            data = sock.read(min(length, blocksize))
384
            length -= len(data)
385
            yield data
386

    
387
class ObjectWrapper(object):
388
    """Return the object's data block-per-block in each iteration.
389
    
390
    Read from the object using the offset and length provided in each entry of the range list.
391
    """
392
    
393
    def __init__(self, ranges, size, hashmap, boundary):
394
        self.ranges = ranges
395
        self.size = size
396
        self.hashmap = hashmap
397
        self.boundary = boundary
398
        
399
        self.block_index = -1
400
        self.block = ''
401
        
402
        self.range_index = -1
403
        self.offset, self.length = self.ranges[0]
404
    
405
    def __iter__(self):
406
        return self
407
    
408
    def part_iterator(self):
409
        if self.length > 0:
410
            # Get the block for the current offset.
411
            bi = int(self.offset / backend.block_size)
412
            if self.block_index != bi:
413
                try:
414
                    self.block = backend.get_block(self.hashmap[bi])
415
                except NameError:
416
                    raise ItemNotFound('Block does not exist')
417
                self.block_index = bi
418
            # Get the data from the block.
419
            bo = self.offset % backend.block_size
420
            bl = min(self.length, backend.block_size - bo)
421
            data = self.block[bo:bo + bl]
422
            self.offset += bl
423
            self.length -= bl
424
            return data
425
        else:
426
            raise StopIteration
427
    
428
    def next(self):
429
        if len(self.ranges) == 1:
430
            return self.part_iterator()
431
        if self.range_index == len(self.ranges):
432
            raise StopIteration
433
        try:
434
            if self.range_index == -1:
435
                raise StopIteration
436
            return self.part_iterator()
437
        except StopIteration:
438
            self.range_index += 1
439
            out = []
440
            if self.range_index < len(self.ranges):
441
                # Part header.
442
                self.offset, self.length = self.ranges[self.range_index]
443
                if self.range_index > 0:
444
                    out.append('')
445
                out.append('--' + self.boundary)
446
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
447
                out.append('Content-Transfer-Encoding: binary')
448
                out.append('')
449
                out.append('')
450
                return '\r\n'.join(out)
451
            else:
452
                # Footer.
453
                out.append('')
454
                out.append('--' + self.boundary + '--')
455
                out.append('')
456
                return '\r\n'.join(out)
457

    
458
def object_data_response(request, size, hashmap, meta, public=False):
459
    """Get the HttpResponse object for replying with the object's data."""
460
    
461
    # Range handling.
462
    ranges = get_range(request, size)
463
    if ranges is None:
464
        ranges = [(0, size)]
465
        ret = 200
466
    else:
467
        check = [True for offset, length in ranges if
468
                    length <= 0 or length > size or
469
                    offset < 0 or offset >= size or
470
                    offset + length > size]
471
        if len(check) > 0:
472
            raise RangeNotSatisfiable('Requested range exceeds object limits')        
473
        ret = 206
474
    
475
    if ret == 206 and len(ranges) > 1:
476
        boundary = uuid.uuid4().hex
477
    else:
478
        boundary = ''
479
    wrapper = ObjectWrapper(ranges, size, hashmap, boundary)
480
    response = HttpResponse(wrapper, status=ret)
481
    put_object_meta(response, meta, public)
482
    if ret == 206:
483
        if len(ranges) == 1:
484
            offset, length = ranges[0]
485
            response['Content-Length'] = length # Update with the correct length.
486
            response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
487
        else:
488
            del(response['Content-Length'])
489
            response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
490
    return response
491

    
492
def hashmap_hash(hashmap):
493
    """Produce the root hash, treating the hashmap as a Merkle-like tree."""
494
    
495
    def subhash(d):
496
        h = hashlib.new(backend.hash_algorithm)
497
        h.update(d)
498
        return h.digest()
499
    
500
    if len(hashmap) == 0:
501
        return hexlify(subhash(''))
502
    if len(hashmap) == 1:
503
        return hexlify(subhash(hashmap[0]))
504
    s = 2
505
    while s < len(hashmap):
506
        s = s * 2
507
    h = hashmap + ([('\x00' * len(hashmap[0]))] * (s - len(hashmap)))
508
    h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)]
509
    while len(h) > 1:
510
        h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)]
511
    return hexlify(h[0])
512

    
513
def update_response_headers(request, response):
514
    if request.serialization == 'xml':
515
        response['Content-Type'] = 'application/xml; charset=UTF-8'
516
    elif request.serialization == 'json':
517
        response['Content-Type'] = 'application/json; charset=UTF-8'
518
    elif not response['Content-Type']:
519
        response['Content-Type'] = 'text/plain; charset=UTF-8'
520

    
521
    if settings.TEST:
522
        response['Date'] = format_date_time(time())
523

    
524
def render_fault(request, fault):
525
    if settings.DEBUG or settings.TEST:
526
        fault.details = format_exc(fault)
527

    
528
    request.serialization = 'text'
529
    data = '\n'.join((fault.message, fault.details)) + '\n'
530
    response = HttpResponse(data, status=fault.code)
531
    update_response_headers(request, response)
532
    return response
533

    
534
def request_serialization(request, format_allowed=False):
535
    """Return the serialization format requested.
536
    
537
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
538
    """
539
    
540
    if not format_allowed:
541
        return 'text'
542
    
543
    format = request.GET.get('format')
544
    if format == 'json':
545
        return 'json'
546
    elif format == 'xml':
547
        return 'xml'
548
    
549
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
550
        accept, sep, rest = item.strip().partition(';')
551
        if accept == 'application/json':
552
            return 'json'
553
        elif accept == 'application/xml' or accept == 'text/xml':
554
            return 'xml'
555
    
556
    return 'text'
557

    
558
def api_method(http_method=None, format_allowed=False):
559
    """Decorator function for views that implement an API method."""
560
    
561
    def decorator(func):
562
        @wraps(func)
563
        def wrapper(request, *args, **kwargs):
564
            try:
565
                if http_method and request.method != http_method:
566
                    raise BadRequest('Method not allowed.')
567
                
568
                # The args variable may contain up to (account, container, object).
569
                if len(args) > 1 and len(args[1]) > 256:
570
                    raise BadRequest('Container name too large.')
571
                if len(args) > 2 and len(args[2]) > 1024:
572
                    raise BadRequest('Object name too large.')
573
                
574
                # Fill in custom request variables.
575
                request.serialization = request_serialization(request, format_allowed)
576
                # TODO: Authenticate.
577
                request.user = "test"
578
                
579
                response = func(request, *args, **kwargs)
580
                update_response_headers(request, response)
581
                return response
582
            except Fault, fault:
583
                return render_fault(request, fault)
584
            except BaseException, e:
585
                logger.exception('Unexpected error: %s' % e)
586
                fault = ServiceUnavailable('Unexpected error')
587
                return render_fault(request, fault)
588
        return wrapper
589
    return decorator