Statistics
| Branch: | Tag: | Revision:

root / pithos / api / util.py @ 2c22e4ac

History | View | Annotate | Download (20.6 kB)

1
# Copyright 2011 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38
from binascii import hexlify
39

    
40
from django.conf import settings
41
from django.http import HttpResponse
42
from django.utils.http import http_date, parse_etags
43

    
44
from pithos.api.compat import parse_http_date_safe
45
from pithos.api.faults import (Fault, NotModified, BadRequest, ItemNotFound, LengthRequired,
46
                                PreconditionFailed, ServiceUnavailable)
47
from pithos.backends import backend
48

    
49
import datetime
50
import logging
51
import re
52
import hashlib
53

    
54

    
55
logger = logging.getLogger(__name__)
56

    
57

    
58
def printable_meta_dict(d):
59
    """Format a meta dictionary for printing out json/xml.
60
    
61
    Convert all keys to lower case and replace dashes to underscores.
62
    Change 'modified' key from backend to 'last_modified' and format date.
63
    """
64
    if 'modified' in d:
65
        d['last_modified'] = datetime.datetime.fromtimestamp(int(d['modified'])).isoformat()
66
        del(d['modified'])
67
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
68

    
69
def format_meta_key(k):
70
    """Convert underscores to dashes and capitalize intra-dash strings."""
71
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
72

    
73
def get_meta_prefix(request, prefix):
74
    """Get all prefix-* request headers in a dict. Reformat keys with format_meta_key()."""
75
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
76
    return dict([(format_meta_key(k[5:]), v) for k, v in request.META.iteritems() if k.startswith(prefix)])
77

    
78
def get_account_meta(request):
79
    """Get metadata from an account request."""
80
    meta = get_meta_prefix(request, 'X-Account-Meta-')    
81
    return meta
82

    
83
def put_account_meta(response, meta):
84
    """Put metadata in an account response."""
85
    response['X-Account-Container-Count'] = meta['count']
86
    response['X-Account-Bytes-Used'] = meta['bytes']
87
    if 'modified' in meta:
88
        response['Last-Modified'] = http_date(int(meta['modified']))
89
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
90
        response[k.encode('utf-8')] = meta[k].encode('utf-8')
91

    
92
def get_container_meta(request):
93
    """Get metadata from a container request."""
94
    meta = get_meta_prefix(request, 'X-Container-Meta-')
95
    return meta
96

    
97
def put_container_meta(response, meta):
98
    """Put metadata in a container response."""
99
    response['X-Container-Object-Count'] = meta['count']
100
    response['X-Container-Bytes-Used'] = meta['bytes']
101
    response['Last-Modified'] = http_date(int(meta['modified']))
102
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
103
        response[k.encode('utf-8')] = meta[k].encode('utf-8')
104
    response['X-Container-Object-Meta'] = [x[14:] for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
105
    response['X-Container-Block-Size'] = backend.block_size
106
    response['X-Container-Block-Hash'] = backend.hash_algorithm
107

    
108
def get_object_meta(request):
109
    """Get metadata from an object request."""
110
    meta = get_meta_prefix(request, 'X-Object-Meta-')
111
    if request.META.get('CONTENT_TYPE'):
112
        meta['Content-Type'] = request.META['CONTENT_TYPE']
113
    if request.META.get('HTTP_CONTENT_ENCODING'):
114
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
115
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
116
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
117
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
118
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
119
    if request.META.get('HTTP_X_OBJECT_PUBLIC'):
120
        meta['X-Object-Public'] = request.META['HTTP_X_OBJECT_PUBLIC']
121
    return meta
122

    
123
def put_object_meta(response, meta):
124
    """Put metadata in an object response."""
125
    response['ETag'] = meta['hash']
126
    response['Content-Length'] = meta['bytes']
127
    response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
128
    response['Last-Modified'] = http_date(int(meta['modified']))
129
    response['X-Object-Version'] = meta['version']
130
    response['X-Object-Version-Timestamp'] = meta['version_timestamp']
131
    for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
132
        response[k.encode('utf-8')] = meta[k].encode('utf-8')
133
    for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest', 'X-Object-Public'):
134
        if k in meta:
135
            response[k] = meta[k]
136

    
137
def validate_modification_preconditions(request, meta):
138
    """Check that the modified timestamp conforms with the preconditions set."""
139
    if 'modified' not in meta:
140
        return # TODO: Always return?
141
    
142
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
143
    if if_modified_since is not None:
144
        if_modified_since = parse_http_date_safe(if_modified_since)
145
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
146
        raise NotModified('Resource has not been modified')
147
    
148
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
149
    if if_unmodified_since is not None:
150
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
151
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
152
        raise PreconditionFailed('Resource has been modified')
153

    
154
def validate_matching_preconditions(request, meta):
155
    """Check that the ETag conforms with the preconditions set."""
156
    if 'hash' not in meta:
157
        return # TODO: Always return?
158
    
159
    if_match = request.META.get('HTTP_IF_MATCH')
160
    if if_match is not None and if_match != '*':
161
        if meta['hash'] not in [x.lower() for x in parse_etags(if_match)]:
162
            raise PreconditionFailed('Resource Etag does not match')
163
    
164
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
165
    if if_none_match is not None:
166
        if if_none_match == '*' or meta['hash'] in [x.lower() for x in parse_etags(if_none_match)]:
167
            raise NotModified('Resource Etag matches')
168

    
169
def copy_or_move_object(request, v_account, src_path, dest_path, move=False):
170
    """Copy or move an object."""
171
    if type(src_path) == str:
172
        parts = src_path.split('/')
173
        if len(parts) < 3 or parts[0] != '':
174
            raise BadRequest('Invalid X-Copy-From or X-Move-From header')
175
        src_container = parts[1]
176
        src_name = '/'.join(parts[2:])
177
    elif type(src_path) == tuple and len(src_path) == 2:
178
        src_container, src_name = src_path
179
    if type(dest_path) == str:
180
        parts = dest_path.split('/')
181
        if len(parts) < 3 or parts[0] != '':
182
            raise BadRequest('Invalid Destination header')
183
        dest_container = parts[1]
184
        dest_name = '/'.join(parts[2:])
185
    elif type(dest_path) == tuple and len(dest_path) == 2:
186
        dest_container, dest_name = dest_path
187
    
188
    meta = get_object_meta(request)
189
    # Keep previous values of 'Content-Type' (if a new one is absent) and 'hash'.
190
    try:
191
        src_meta = backend.get_object_meta(v_account, src_container, src_name)
192
    except NameError:
193
        raise ItemNotFound('Container or object does not exist')
194
    if 'Content-Type' in meta and 'Content-Type' in src_meta:
195
        del(src_meta['Content-Type'])
196
    for k in ('Content-Type', 'hash'):
197
        if k in src_meta:
198
            meta[k] = src_meta[k]
199
    
200
    # TODO: Copy or move with 'versioned' set.
201
    try:
202
        if move:
203
            backend.move_object(v_account, src_container, src_name, dest_container, dest_name, meta, replace_meta=True)
204
        else:
205
            backend.copy_object(v_account, src_container, src_name, dest_container, dest_name, meta, replace_meta=True)
206
    except NameError:
207
        raise ItemNotFound('Container or object does not exist')
208

    
209
def get_version(request):
210
    version = request.GET.get('version')
211
    if version is not None:
212
        try:
213
            version = int(version)
214
        except ValueError:
215
            return None
216
        if version < 0:
217
            return None
218
    return version
219

    
220
def get_content_length(request):
221
    content_length = request.META.get('CONTENT_LENGTH')
222
    if not content_length:
223
        raise LengthRequired('Missing Content-Length header')
224
    try:
225
        content_length = int(content_length)
226
        if content_length < 0:
227
            raise ValueError
228
    except ValueError:
229
        raise BadRequest('Invalid Content-Length header')
230
    return content_length
231

    
232
def get_range(request, size):
233
    """Parse a Range header from the request.
234
    
235
    Either returns None, when the header is not existent or should be ignored,
236
    or a list of (offset, length) tuples - should be further checked.
237
    """
238
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
239
    if not ranges.startswith('bytes='):
240
        return None
241
    
242
    ret = []
243
    for r in (x.strip() for x in ranges[6:].split(',')):
244
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
245
        m = p.match(r)
246
        if not m:
247
            return None
248
        offset = m.group('offset')
249
        upto = m.group('upto')
250
        if offset == '' and upto == '':
251
            return None
252
        
253
        if offset != '':
254
            offset = int(offset)
255
            if upto != '':
256
                upto = int(upto)
257
                if offset > upto:
258
                    return None
259
                ret.append((offset, upto - offset + 1))
260
            else:
261
                ret.append((offset, size - offset))
262
        else:
263
            length = int(upto)
264
            ret.append((size - length, length))
265
    
266
    return ret
267

    
268
def get_content_range(request):
269
    """Parse a Content-Range header from the request.
270
    
271
    Either returns None, when the header is not existent or should be ignored,
272
    or an (offset, length, total) tuple - check as length, total may be None.
273
    Returns (None, None, None) if the provided range is '*/*'.
274
    """
275
    
276
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
277
    if not ranges:
278
        return None
279
    
280
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
281
    m = p.match(ranges)
282
    if not m:
283
        if ranges == 'bytes */*':
284
            return (None, None, None)
285
        return None
286
    offset = int(m.group('offset'))
287
    upto = m.group('upto')
288
    total = m.group('total')
289
    if upto != '':
290
        upto = int(upto)
291
    else:
292
        upto = None
293
    if total != '*':
294
        total = int(total)
295
    else:
296
        total = None
297
    if (upto is not None and offset > upto) or \
298
        (total is not None and offset >= total) or \
299
        (total is not None and upto is not None and upto >= total):
300
        return None
301
    
302
    if upto is None:
303
        length = None
304
    else:
305
        length = upto - offset + 1
306
    return (offset, length, total)
307

    
308
def raw_input_socket(request):
309
    """Return the socket for reading the rest of the request."""
310
    server_software = request.META.get('SERVER_SOFTWARE')
311
    if not server_software:
312
        if 'wsgi.input' in request.environ:
313
            return request.environ['wsgi.input']
314
        raise ServiceUnavailable('Unknown server software')
315
    if server_software.startswith('WSGIServer'):
316
        return request.environ['wsgi.input']
317
    elif server_software.startswith('mod_python'):
318
        return request._req
319
    raise ServiceUnavailable('Unknown server software')
320

    
321
MAX_UPLOAD_SIZE = 10 * (1024 * 1024) # 10MB
322

    
323
def socket_read_iterator(sock, length=0, blocksize=4096):
324
    """Return a maximum of blocksize data read from the socket in each iteration.
325
    
326
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
327
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
328
    """
329
    if length < 0: # Chunked transfers
330
        data = ''
331
        while length < MAX_UPLOAD_SIZE:
332
            # Get chunk size.
333
            if hasattr(sock, 'readline'):
334
                chunk_length = sock.readline()
335
            else:
336
                chunk_length = ''
337
                while chunk_length[-1:] != '\n':
338
                    chunk_length += sock.read(1)
339
                chunk_length.strip()
340
            pos = chunk_length.find(';')
341
            if pos >= 0:
342
                chunk_length = chunk_length[:pos]
343
            try:
344
                chunk_length = int(chunk_length, 16)
345
            except Exception, e:
346
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
347
            # Check if done.
348
            if chunk_length == 0:
349
                if len(data) > 0:
350
                    yield data
351
                return
352
            # Get the actual data.
353
            while chunk_length > 0:
354
                chunk = sock.read(min(chunk_length, blocksize))
355
                chunk_length -= len(chunk)
356
                length += len(chunk)
357
                data += chunk
358
                if len(data) >= blocksize:
359
                    ret = data[:blocksize]
360
                    data = data[blocksize:]
361
                    yield ret
362
            sock.read(2) # CRLF
363
        # TODO: Raise something to note that maximum size is reached.
364
    else:
365
        if length > MAX_UPLOAD_SIZE:
366
            # TODO: Raise something to note that maximum size is reached.
367
            pass
368
        while length > 0:
369
            data = sock.read(min(length, blocksize))
370
            length -= len(data)
371
            yield data
372

    
373
class ObjectWrapper(object):
374
    """Return the object's data block-per-block in each iteration.
375
    
376
    Read from the object using the offset and length provided in each entry of the range list.
377
    """
378
    
379
    def __init__(self, v_account, v_container, v_object, ranges, size, hashmap, boundary):
380
        self.v_account = v_account
381
        self.v_container = v_container
382
        self.v_object = v_object
383
        self.ranges = ranges
384
        self.size = size
385
        self.hashmap = hashmap
386
        self.boundary = boundary
387
        
388
        self.block_index = -1
389
        self.block = ''
390
        
391
        self.range_index = -1
392
        self.offset, self.length = self.ranges[0]
393
    
394
    def __iter__(self):
395
        return self
396
    
397
    def part_iterator(self):
398
        if self.length > 0:
399
            # Get the block for the current offset.
400
            bi = int(self.offset / backend.block_size)
401
            if self.block_index != bi:
402
                try:
403
                    self.block = backend.get_block(self.hashmap[bi])
404
                except NameError:
405
                    raise ItemNotFound('Block does not exist')
406
                self.block_index = bi
407
            # Get the data from the block.
408
            bo = self.offset % backend.block_size
409
            bl = min(self.length, backend.block_size - bo)
410
            data = self.block[bo:bo + bl]
411
            self.offset += bl
412
            self.length -= bl
413
            return data
414
        else:
415
            raise StopIteration
416
    
417
    def next(self):
418
        if len(self.ranges) == 1:
419
            return self.part_iterator()
420
        if self.range_index == len(self.ranges):
421
            raise StopIteration
422
        try:
423
            if self.range_index == -1:
424
                raise StopIteration
425
            return self.part_iterator()
426
        except StopIteration:
427
            self.range_index += 1
428
            out = []
429
            if self.range_index < len(self.ranges):
430
                # Part header.
431
                self.offset, self.length = self.ranges[self.range_index]
432
                if self.range_index > 0:
433
                    out.append('')
434
                out.append('--' + self.boundary)
435
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
436
                out.append('Content-Transfer-Encoding: binary')
437
                out.append('')
438
                out.append('')
439
                return '\r\n'.join(out)
440
            else:
441
                # Footer.
442
                out.append('')
443
                out.append('--' + self.boundary + '--')
444
                out.append('')
445
                return '\r\n'.join(out)
446

    
447
def hashmap_hash(hashmap):
448
    """Produce the root hash, treating the hashmap as a Merkle-like tree."""
449
    
450
    def subhash(d):
451
        h = hashlib.new(backend.hash_algorithm)
452
        h.update(d)
453
        return h.digest()
454
    
455
    if len(hashmap) == 0:
456
        return hexlify(subhash(''))
457
    if len(hashmap) == 1:
458
        return hexlify(subhash(hashmap[0]))
459
    s = 2
460
    while s < len(hashmap):
461
        s = s * 2
462
    h = hashmap + ([('\x00' * len(hashmap[0]))] * (s - len(hashmap)))
463
    h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)]
464
    while len(h) > 1:
465
        h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)]
466
    return hexlify(h[0])
467

    
468
def update_response_headers(request, response):
469
    if request.serialization == 'xml':
470
        response['Content-Type'] = 'application/xml; charset=UTF-8'
471
    elif request.serialization == 'json':
472
        response['Content-Type'] = 'application/json; charset=UTF-8'
473
    elif not response['Content-Type']:
474
        response['Content-Type'] = 'text/plain; charset=UTF-8'
475

    
476
    if settings.TEST:
477
        response['Date'] = format_date_time(time())
478

    
479
def render_fault(request, fault):
480
    if settings.DEBUG or settings.TEST:
481
        fault.details = format_exc(fault)
482

    
483
    request.serialization = 'text'
484
    data = '\n'.join((fault.message, fault.details)) + '\n'
485
    response = HttpResponse(data, status=fault.code)
486
    update_response_headers(request, response)
487
    return response
488

    
489
def request_serialization(request, format_allowed=False):
490
    """Return the serialization format requested.
491
    
492
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
493
    """
494
    if not format_allowed:
495
        return 'text'
496
    
497
    format = request.GET.get('format')
498
    if format == 'json':
499
        return 'json'
500
    elif format == 'xml':
501
        return 'xml'
502
    
503
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
504
        accept, sep, rest = item.strip().partition(';')
505
        if accept == 'application/json':
506
            return 'json'
507
        elif accept == 'application/xml' or accept == 'text/xml':
508
            return 'xml'
509
    
510
    return 'text'
511

    
512
def api_method(http_method=None, format_allowed=False):
513
    """Decorator function for views that implement an API method."""
514
    def decorator(func):
515
        @wraps(func)
516
        def wrapper(request, *args, **kwargs):
517
            try:
518
                if http_method and request.method != http_method:
519
                    raise BadRequest('Method not allowed.')
520
                
521
                # The args variable may contain up to (account, container, object).
522
                if len(args) > 1 and len(args[1]) > 256:
523
                    raise BadRequest('Container name too large.')
524
                if len(args) > 2 and len(args[2]) > 1024:
525
                    raise BadRequest('Object name too large.')
526
                
527
                # Fill in custom request variables.
528
                request.serialization = request_serialization(request, format_allowed)
529
                # TODO: Authenticate.
530
                request.user = "test"
531
                
532
                response = func(request, *args, **kwargs)
533
                update_response_headers(request, response)
534
                return response
535
            except Fault, fault:
536
                return render_fault(request, fault)
537
            except BaseException, e:
538
                logger.exception('Unexpected error: %s' % e)
539
                fault = ServiceUnavailable('Unexpected error')
540
                return render_fault(request, fault)
541
        return wrapper
542
    return decorator