Statistics
| Branch: | Tag: | Revision:

root / pithos / api / util.py @ cbfb6636

History | View | Annotate | Download (20 kB)

1
# Copyright 2011 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify
39

    
40
from django.conf import settings
41
from django.http import HttpResponse
42
from django.utils.http import http_date, parse_etags
43

    
44
from pithos.api.compat import parse_http_date_safe
45
from pithos.api.faults import (Fault, NotModified, BadRequest, ItemNotFound, LengthRequired,
46
                                PreconditionFailed, ServiceUnavailable)
47
from pithos.backends import backend
48

    
49
import datetime
50
import logging
51
import re
52
import hashlib
53

    
54

    
55
logger = logging.getLogger(__name__)
56

    
57

    
58
def printable_meta_dict(d):
59
    """Format a meta dictionary for printing out json/xml.
60
    
61
    Convert all keys to lower case and replace dashes to underscores.
62
    Change 'modified' key from backend to 'last_modified' and format date.
63
    """
64
    if 'modified' in d:
65
        d['last_modified'] = datetime.datetime.fromtimestamp(int(d['modified'])).isoformat()
66
        del(d['modified'])
67
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
68

    
69
def format_meta_key(k):
70
    """Convert underscores to dashes and capitalize intra-dash strings"""
71
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
72

    
73
def get_meta_prefix(request, prefix):
74
    """Get all prefix-* request headers in a dict. Reformat keys with format_meta_key()"""
75
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
76
    return dict([(format_meta_key(k[5:]), v) for k, v in request.META.iteritems() if k.startswith(prefix)])
77

    
78
def get_account_meta(request):
79
    """Get metadata from an account request"""
80
    meta = get_meta_prefix(request, 'X-Account-Meta-')    
81
    return meta
82

    
83
def put_account_meta(response, meta):
84
    """Put metadata in an account response"""
85
    response['X-Account-Container-Count'] = meta['count']
86
    response['X-Account-Bytes-Used'] = meta['bytes']
87
    if 'modified' in meta:
88
        response['Last-Modified'] = http_date(int(meta['modified']))
89
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
90
        response[k.encode('utf-8')] = meta[k].encode('utf-8')
91

    
92
def get_container_meta(request):
93
    """Get metadata from a container request"""
94
    meta = get_meta_prefix(request, 'X-Container-Meta-')
95
    return meta
96

    
97
def put_container_meta(response, meta):
98
    """Put metadata in a container response"""
99
    response['X-Container-Object-Count'] = meta['count']
100
    response['X-Container-Bytes-Used'] = meta['bytes']
101
    if 'modified' in meta:
102
        response['Last-Modified'] = http_date(int(meta['modified']))
103
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
104
        response[k.encode('utf-8')] = meta[k].encode('utf-8')
105
    response['X-Container-Object-Meta'] = [x[14:] for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
106
    response['X-Container-Block-Size'] = backend.block_size
107
    response['X-Container-Block-Hash'] = backend.hash_algorithm
108

    
109
def get_object_meta(request):
110
    """Get metadata from an object request"""
111
    meta = get_meta_prefix(request, 'X-Object-Meta-')
112
    if request.META.get('CONTENT_TYPE'):
113
        meta['Content-Type'] = request.META['CONTENT_TYPE']
114
    if request.META.get('HTTP_CONTENT_ENCODING'):
115
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
116
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
117
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
118
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
119
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
120
    return meta
121

    
122
def put_object_meta(response, meta):
123
    """Put metadata in an object response"""
124
    response['ETag'] = meta['hash']
125
    response['Content-Length'] = meta['bytes']
126
    response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
127
    response['Last-Modified'] = http_date(int(meta['modified']))
128
    for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
129
        response[k.encode('utf-8')] = meta[k].encode('utf-8')
130
    for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest'):
131
        if k in meta:
132
            response[k] = meta[k]
133

    
134
def validate_modification_preconditions(request, meta):
135
    """Check that the modified timestamp conforms with the preconditions set"""
136
    if 'modified' not in meta:
137
        return # TODO: Always return?
138
    
139
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
140
    if if_modified_since is not None:
141
        if_modified_since = parse_http_date_safe(if_modified_since)
142
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
143
        raise NotModified('Object has not been modified')
144
    
145
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
146
    if if_unmodified_since is not None:
147
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
148
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
149
        raise PreconditionFailed('Object has been modified')
150

    
151
def validate_matching_preconditions(request, meta):
152
    """Check that the ETag conforms with the preconditions set"""
153
    if 'hash' not in meta:
154
        return # TODO: Always return?
155
    
156
    if_match = request.META.get('HTTP_IF_MATCH')
157
    if if_match is not None and if_match != '*':
158
        if meta['hash'] not in [x.lower() for x in parse_etags(if_match)]:
159
            raise PreconditionFailed('Object Etag does not match')
160
    
161
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
162
    if if_none_match is not None:
163
        if if_none_match == '*' or meta['hash'] in [x.lower() for x in parse_etags(if_none_match)]:
164
            raise NotModified('Object Etag matches')
165

    
166
def copy_or_move_object(request, src_path, dest_path, move=False):
167
    """Copy or move an object"""
168
    if type(src_path) == str:
169
        parts = src_path.split('/')
170
        if len(parts) < 3 or parts[0] != '':
171
            raise BadRequest('Invalid X-Copy-From or X-Move-From header')
172
        src_container = parts[1]
173
        src_name = '/'.join(parts[2:])
174
    elif type(src_path) == tuple and len(src_path) == 2:
175
        src_container, src_name = src_path
176
    if type(dest_path) == str:
177
        parts = dest_path.split('/')
178
        if len(parts) < 3 or parts[0] != '':
179
            raise BadRequest('Invalid Destination header')
180
        dest_container = parts[1]
181
        dest_name = '/'.join(parts[2:])
182
    elif type(dest_path) == tuple and len(dest_path) == 2:
183
        dest_container, dest_name = dest_path
184
    
185
    meta = get_object_meta(request)
186
    # Keep previous values of 'Content-Type' (if a new one is absent) and 'hash'.
187
    try:
188
        src_meta = backend.get_object_meta(request.user, src_container, src_name)
189
    except NameError:
190
        raise ItemNotFound('Container or object does not exist')
191
    if 'Content-Type' in meta and 'Content-Type' in src_meta:
192
        del(src_meta['Content-Type'])
193
    for k in ('Content-Type', 'hash'):
194
        if k in src_meta:
195
            meta[k] = src_meta[k]
196
    
197
    try:
198
        if move:
199
            backend.move_object(request.user, src_container, src_name, dest_container, dest_name, meta, replace_meta=True)
200
        else:
201
            backend.copy_object(request.user, src_container, src_name, dest_container, dest_name, meta, replace_meta=True)
202
    except NameError:
203
        raise ItemNotFound('Container or object does not exist')
204

    
205
def get_content_length(request):
206
    content_length = request.META.get('CONTENT_LENGTH')
207
    if not content_length:
208
        raise LengthRequired('Missing Content-Length header')
209
    try:
210
        content_length = int(content_length)
211
        if content_length < 0:
212
            raise ValueError
213
    except ValueError:
214
        raise BadRequest('Invalid Content-Length header')
215
    return content_length
216

    
217
def get_range(request, size):
218
    """Parse a Range header from the request
219
    
220
    Either returns None, when the header is not existent or should be ignored,
221
    or a list of (offset, length) tuples - should be further checked.
222
    """
223
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
224
    if not ranges.startswith('bytes='):
225
        return None
226
    
227
    ret = []
228
    for r in (x.strip() for x in ranges[6:].split(',')):
229
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
230
        m = p.match(r)
231
        if not m:
232
            return None
233
        offset = m.group('offset')
234
        upto = m.group('upto')
235
        if offset == '' and upto == '':
236
            return None
237
        
238
        if offset != '':
239
            offset = int(offset)
240
            if upto != '':
241
                upto = int(upto)
242
                if offset > upto:
243
                    return None
244
                ret.append((offset, upto - offset + 1))
245
            else:
246
                ret.append((offset, size - offset))
247
        else:
248
            length = int(upto)
249
            ret.append((size - length, length))
250
    
251
    return ret
252

    
253
def get_content_range(request):
254
    """Parse a Content-Range header from the request
255
    
256
    Either returns None, when the header is not existent or should be ignored,
257
    or an (offset, length, total) tuple - check as length, total may be None.
258
    Returns (None, None, None) if the provided range is '*/*'.
259
    """
260
    
261
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
262
    if not ranges:
263
        return None
264
    
265
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
266
    m = p.match(ranges)
267
    if not m:
268
        if ranges == 'bytes */*':
269
            return (None, None, None)
270
        return None
271
    offset = int(m.group('offset'))
272
    upto = m.group('upto')
273
    total = m.group('total')
274
    if upto != '':
275
        upto = int(upto)
276
    else:
277
        upto = None
278
    if total != '*':
279
        total = int(total)
280
    else:
281
        total = None
282
    if (upto is not None and offset > upto) or \
283
        (total is not None and offset >= total) or \
284
        (total is not None and upto is not None and upto >= total):
285
        return None
286
    
287
    if upto is None:
288
        length = None
289
    else:
290
        length = upto - offset + 1
291
    return (offset, length, total)
292

    
293
def raw_input_socket(request):
294
    """Return the socket for reading the rest of the request"""
295
    server_software = request.META.get('SERVER_SOFTWARE')
296
    if not server_software:
297
        if 'wsgi.input' in request.environ:
298
            return request.environ['wsgi.input']
299
        raise ServiceUnavailable('Unknown server software')
300
    if server_software.startswith('WSGIServer'):
301
        return request.environ['wsgi.input']
302
    elif server_software.startswith('mod_python'):
303
        return request._req
304
    raise ServiceUnavailable('Unknown server software')
305

    
306
MAX_UPLOAD_SIZE = 10 * (1024 * 1024) # 10MB
307

    
308
def socket_read_iterator(sock, length=0, blocksize=4096):
309
    """Return a maximum of blocksize data read from the socket in each iteration
310
    
311
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
312
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
313
    """
314
    if length < 0: # Chunked transfers
315
        data = ''
316
        while length < MAX_UPLOAD_SIZE:
317
            # Get chunk size.
318
            if hasattr(sock, 'readline'):
319
                chunk_length = sock.readline()
320
            else:
321
                chunk_length = ''
322
                while chunk_length[-1:] != '\n':
323
                    chunk_length += sock.read(1)
324
                chunk_length.strip()
325
            pos = chunk_length.find(';')
326
            if pos >= 0:
327
                chunk_length = chunk_length[:pos]
328
            try:
329
                chunk_length = int(chunk_length, 16)
330
            except Exception, e:
331
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
332
            # Check if done.
333
            if chunk_length == 0:
334
                if len(data) > 0:
335
                    yield data
336
                return
337
            # Get the actual data.
338
            while chunk_length > 0:
339
                chunk = sock.read(min(chunk_length, blocksize))
340
                chunk_length -= len(chunk)
341
                length += len(chunk)
342
                data += chunk
343
                if len(data) >= blocksize:
344
                    ret = data[:blocksize]
345
                    data = data[blocksize:]
346
                    yield ret
347
            sock.read(2) # CRLF
348
        # TODO: Raise something to note that maximum size is reached.
349
    else:
350
        if length > MAX_UPLOAD_SIZE:
351
            # TODO: Raise something to note that maximum size is reached.
352
            pass
353
        while length > 0:
354
            data = sock.read(min(length, blocksize))
355
            length -= len(data)
356
            yield data
357

    
358
class ObjectWrapper(object):
359
    """Return the object's data block-per-block in each iteration
360
    
361
    Read from the object using the offset and length provided in each entry of the range list.
362
    """
363
    
364
    def __init__(self, v_account, v_container, v_object, ranges, size, hashmap, boundary):
365
        self.v_account = v_account
366
        self.v_container = v_container
367
        self.v_object = v_object
368
        self.ranges = ranges
369
        self.size = size
370
        self.hashmap = hashmap
371
        self.boundary = boundary
372
        
373
        self.block_index = -1
374
        self.block = ''
375
        
376
        self.range_index = -1
377
        self.offset, self.length = self.ranges[0]
378
    
379
    def __iter__(self):
380
        return self
381
    
382
    def part_iterator(self):
383
        if self.length > 0:
384
            # Get the block for the current offset.
385
            bi = int(self.offset / backend.block_size)
386
            if self.block_index != bi:
387
                try:
388
                    self.block = backend.get_block(self.hashmap[bi])
389
                except NameError:
390
                    raise ItemNotFound('Block does not exist')
391
                self.block_index = bi
392
            # Get the data from the block.
393
            bo = self.offset % backend.block_size
394
            bl = min(self.length, backend.block_size - bo)
395
            data = self.block[bo:bo + bl]
396
            self.offset += bl
397
            self.length -= bl
398
            return data
399
        else:
400
            raise StopIteration
401
    
402
    def next(self):
403
        if len(self.ranges) == 1:
404
            return self.part_iterator()
405
        if self.range_index == len(self.ranges):
406
            raise StopIteration
407
        try:
408
            if self.range_index == -1:
409
                raise StopIteration
410
            return self.part_iterator()
411
        except StopIteration:
412
            self.range_index += 1
413
            out = []
414
            if self.range_index < len(self.ranges):
415
                # Part header.
416
                self.offset, self.length = self.ranges[self.range_index]
417
                if self.range_index > 0:
418
                    out.append('')
419
                out.append('--' + self.boundary)
420
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
421
                out.append('Content-Transfer-Encoding: binary')
422
                out.append('')
423
                out.append('')
424
                return '\r\n'.join(out)
425
            else:
426
                # Footer.
427
                out.append('')
428
                out.append('--' + self.boundary + '--')
429
                out.append('')
430
                return '\r\n'.join(out)
431

    
432
def hashmap_hash(hashmap):
433
    """ Produce the root hash, treating the hashmap as a Merkle-like tree."""
434
    
435
    def subhash(d):
436
        h = hashlib.new(backend.hash_algorithm)
437
        h.update(d)
438
        return h.digest()
439
    
440
    if len(hashmap) == 0:
441
        return hexlify(subhash(''))
442
    if len(hashmap) == 1:
443
        return hexlify(subhash(hashmap[0]))
444
    s = 2
445
    while s < len(hashmap):
446
        s = s * 2
447
    h = hashmap + ([('\x00' * len(hashmap[0]))] * (s - len(hashmap)))
448
    h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)]
449
    while len(h) > 1:
450
        h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)]
451
    return hexlify(h[0])
452

    
453
def update_response_headers(request, response):
454
    if request.serialization == 'xml':
455
        response['Content-Type'] = 'application/xml; charset=UTF-8'
456
    elif request.serialization == 'json':
457
        response['Content-Type'] = 'application/json; charset=UTF-8'
458
    elif not response['Content-Type']:
459
        response['Content-Type'] = 'text/plain; charset=UTF-8'
460

    
461
    if settings.TEST:
462
        response['Date'] = format_date_time(time())
463

    
464
def render_fault(request, fault):
465
    if settings.DEBUG or settings.TEST:
466
        fault.details = format_exc(fault)
467

    
468
    request.serialization = 'text'
469
    data = '\n'.join((fault.message, fault.details)) + '\n'
470
    response = HttpResponse(data, status=fault.code)
471
    update_response_headers(request, response)
472
    return response
473

    
474
def request_serialization(request, format_allowed=False):
475
    """Return the serialization format requested
476
    
477
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
478
    """
479
    if not format_allowed:
480
        return 'text'
481
    
482
    format = request.GET.get('format')
483
    if format == 'json':
484
        return 'json'
485
    elif format == 'xml':
486
        return 'xml'
487
    
488
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
489
        accept, sep, rest = item.strip().partition(';')
490
        if accept == 'application/json':
491
            return 'json'
492
        elif accept == 'application/xml' or accept == 'text/xml':
493
            return 'xml'
494
    
495
    return 'text'
496

    
497
def api_method(http_method=None, format_allowed=False):
498
    """Decorator function for views that implement an API method"""
499
    def decorator(func):
500
        @wraps(func)
501
        def wrapper(request, *args, **kwargs):
502
            try:
503
                if http_method and request.method != http_method:
504
                    raise BadRequest('Method not allowed.')
505

    
506
                # The args variable may contain up to (account, container, object).
507
                if len(args) > 1 and len(args[1]) > 256:
508
                    raise BadRequest('Container name too large.')
509
                if len(args) > 2 and len(args[2]) > 1024:
510
                    raise BadRequest('Object name too large.')
511
                
512
                # Fill in custom request variables.
513
                request.serialization = request_serialization(request, format_allowed)
514
                # TODO: Authenticate.
515
                request.user = "test"
516
                
517
                response = func(request, *args, **kwargs)
518
                update_response_headers(request, response)
519
                return response
520
            except Fault, fault:
521
                return render_fault(request, fault)
522
            except BaseException, e:
523
                logger.exception('Unexpected error: %s' % e)
524
                fault = ServiceUnavailable('Unexpected error')
525
                return render_fault(request, fault)
526
        return wrapper
527
    return decorator