Statistics
| Branch: | Tag: | Revision:

root / pithos / api / util.py @ 5635f9ef

History | View | Annotate | Download (19.1 kB)

1
# Copyright 2011 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
from functools import wraps
35
from time import time
36
from traceback import format_exc
37
from wsgiref.handlers import format_date_time
38

    
39
from django.conf import settings
40
from django.http import HttpResponse
41
from django.utils.http import http_date, parse_etags
42

    
43
from pithos.api.compat import parse_http_date_safe
44
from pithos.api.faults import (Fault, NotModified, BadRequest, ItemNotFound, LengthRequired,
45
                                PreconditionFailed, ServiceUnavailable)
46
from pithos.backends import backend
47

    
48
import datetime
49
import logging
50
import re
51

    
52

    
53
logger = logging.getLogger(__name__)
54

    
55

    
56
def printable_meta_dict(d):
57
    """Format a meta dictionary for printing out json/xml.
58
    
59
    Convert all keys to lower case and replace dashes to underscores.
60
    Change 'modified' key from backend to 'last_modified' and format date.
61
    """
62
    if 'modified' in d:
63
        d['last_modified'] = datetime.datetime.fromtimestamp(int(d['modified'])).isoformat()
64
        del(d['modified'])
65
    return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
66

    
67
def format_meta_key(k):
68
    """Convert underscores to dashes and capitalize intra-dash strings"""
69
    return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
70

    
71
def get_meta_prefix(request, prefix):
72
    """Get all prefix-* request headers in a dict. Reformat keys with format_meta_key()"""
73
    prefix = 'HTTP_' + prefix.upper().replace('-', '_')
74
    return dict([(format_meta_key(k[5:]), v) for k, v in request.META.iteritems() if k.startswith(prefix)])
75

    
76
def get_account_meta(request):
77
    """Get metadata from an account request"""
78
    meta = get_meta_prefix(request, 'X-Account-Meta-')    
79
    return meta
80

    
81
def put_account_meta(response, meta):
82
    """Put metadata in an account response"""
83
    response['X-Account-Container-Count'] = meta['count']
84
    response['X-Account-Bytes-Used'] = meta['bytes']
85
    if 'modified' in meta:
86
        response['Last-Modified'] = http_date(int(meta['modified']))
87
    for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
88
        response[k.encode('utf-8')] = meta[k].encode('utf-8')
89

    
90
def get_container_meta(request):
91
    """Get metadata from a container request"""
92
    meta = get_meta_prefix(request, 'X-Container-Meta-')
93
    return meta
94

    
95
def put_container_meta(response, meta):
96
    """Put metadata in a container response"""
97
    response['X-Container-Object-Count'] = meta['count']
98
    response['X-Container-Bytes-Used'] = meta['bytes']
99
    if 'modified' in meta:
100
        response['Last-Modified'] = http_date(int(meta['modified']))
101
    for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
102
        response[k.encode('utf-8')] = meta[k].encode('utf-8')
103
    response['X-Container-Object-Meta'] = [x[14:] for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
104

    
105
def get_object_meta(request):
106
    """Get metadata from an object request"""
107
    meta = get_meta_prefix(request, 'X-Object-Meta-')
108
    if request.META.get('CONTENT_TYPE'):
109
        meta['Content-Type'] = request.META['CONTENT_TYPE']
110
    if request.META.get('HTTP_CONTENT_ENCODING'):
111
        meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
112
    if request.META.get('HTTP_CONTENT_DISPOSITION'):
113
        meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
114
    if request.META.get('HTTP_X_OBJECT_MANIFEST'):
115
        meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
116
    return meta
117

    
118
def put_object_meta(response, meta):
119
    """Put metadata in an object response"""
120
    response['ETag'] = meta['hash']
121
    response['Content-Length'] = meta['bytes']
122
    response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
123
    response['Last-Modified'] = http_date(int(meta['modified']))
124
    for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
125
        response[k.encode('utf-8')] = meta[k].encode('utf-8')
126
    for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest'):
127
        if k in meta:
128
            response[k] = meta[k]
129

    
130
def validate_modification_preconditions(request, meta):
131
    """Check that the modified timestamp conforms with the preconditions set"""
132
    if 'modified' not in meta:
133
        return # TODO: Always return?
134
    
135
    if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
136
    if if_modified_since is not None:
137
        if_modified_since = parse_http_date_safe(if_modified_since)
138
    if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
139
        raise NotModified('Object has not been modified')
140
    
141
    if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
142
    if if_unmodified_since is not None:
143
        if_unmodified_since = parse_http_date_safe(if_unmodified_since)
144
    if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
145
        raise PreconditionFailed('Object has been modified')
146

    
147
def validate_matching_preconditions(request, meta):
148
    """Check that the ETag conforms with the preconditions set"""
149
    if 'hash' not in meta:
150
        return # TODO: Always return?
151
    
152
    if_match = request.META.get('HTTP_IF_MATCH')
153
    if if_match is not None and if_match != '*':
154
        if meta['hash'] not in [x.lower() for x in parse_etags(if_match)]:
155
            raise PreconditionFailed('Object Etag does not match')
156
    
157
    if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
158
    if if_none_match is not None:
159
        if if_none_match == '*' or meta['hash'] in [x.lower() for x in parse_etags(if_none_match)]:
160
            raise NotModified('Object Etag matches')
161

    
162
def copy_or_move_object(request, src_path, dest_path, move=False):
163
    """Copy or move an object"""
164
    if type(src_path) == str:
165
        parts = src_path.split('/')
166
        if len(parts) < 3 or parts[0] != '':
167
            raise BadRequest('Invalid X-Copy-From or X-Move-From header')
168
        src_container = parts[1]
169
        src_name = '/'.join(parts[2:])
170
    elif type(src_path) == tuple and len(src_path) == 2:
171
        src_container, src_name = src_path
172
    if type(dest_path) == str:
173
        parts = dest_path.split('/')
174
        if len(parts) < 3 or parts[0] != '':
175
            raise BadRequest('Invalid Destination header')
176
        dest_container = parts[1]
177
        dest_name = '/'.join(parts[2:])
178
    elif type(dest_path) == tuple and len(dest_path) == 2:
179
        dest_container, dest_name = dest_path
180
    
181
    meta = get_object_meta(request)
182
    # Keep previous values of 'Content-Type' (if a new one is absent) and 'hash'.
183
    try:
184
        src_meta = backend.get_object_meta(request.user, src_container, src_name)
185
    except NameError:
186
        raise ItemNotFound('Container or object does not exist')
187
    if 'Content-Type' in meta and 'Content-Type' in src_meta:
188
        del(src_meta['Content-Type'])
189
    for k in ('Content-Type', 'hash'):
190
        if k in src_meta:
191
            meta[k] = src_meta[k]
192
    
193
    try:
194
        if move:
195
            backend.move_object(request.user, src_container, src_name, dest_container, dest_name, meta, replace_meta=True)
196
        else:
197
            backend.copy_object(request.user, src_container, src_name, dest_container, dest_name, meta, replace_meta=True)
198
    except NameError:
199
        raise ItemNotFound('Container or object does not exist')
200

    
201
def get_content_length(request):
202
    content_length = request.META.get('CONTENT_LENGTH')
203
    if not content_length:
204
        raise LengthRequired('Missing Content-Length header')
205
    try:
206
        content_length = int(content_length)
207
        if content_length < 0:
208
            raise ValueError
209
    except ValueError:
210
        raise BadRequest('Invalid Content-Length header')
211
    return content_length
212

    
213
def get_range(request, size):
214
    """Parse a Range header from the request
215
    
216
    Either returns None, when the header is not existent or should be ignored,
217
    or a list of (offset, length) tuples - should be further checked.
218
    """
219
    ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
220
    if not ranges.startswith('bytes='):
221
        return None
222
    
223
    ret = []
224
    for r in (x.strip() for x in ranges[6:].split(',')):
225
        p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
226
        m = p.match(r)
227
        if not m:
228
            return None
229
        offset = m.group('offset')
230
        upto = m.group('upto')
231
        if offset == '' and upto == '':
232
            return None
233
        
234
        if offset != '':
235
            offset = int(offset)
236
            if upto != '':
237
                upto = int(upto)
238
                if offset > upto:
239
                    return None
240
                ret.append((offset, upto - offset + 1))
241
            else:
242
                ret.append((offset, size - offset))
243
        else:
244
            length = int(upto)
245
            ret.append((size - length, length))
246
    
247
    return ret
248

    
249
def get_content_range(request):
250
    """Parse a Content-Range header from the request
251
    
252
    Either returns None, when the header is not existent or should be ignored,
253
    or an (offset, length, total) tuple - check as length, total may be None.
254
    Returns (None, None, None) if the provided range is '*/*'.
255
    """
256
    
257
    ranges = request.META.get('HTTP_CONTENT_RANGE', '')
258
    if not ranges:
259
        return None
260
    
261
    p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
262
    m = p.match(ranges)
263
    if not m:
264
        if ranges == 'bytes */*':
265
            return (None, None, None)
266
        return None
267
    offset = int(m.group('offset'))
268
    upto = m.group('upto')
269
    total = m.group('total')
270
    if upto != '':
271
        upto = int(upto)
272
    else:
273
        upto = None
274
    if total != '*':
275
        total = int(total)
276
    else:
277
        total = None
278
    if (upto and offset > upto) or \
279
        (total and offset >= total) or \
280
        (total and upto and upto >= total):
281
        return None
282
    
283
    if not upto:
284
        length = None
285
    else:
286
        length = upto - offset + 1
287
    return (offset, length, total)
288

    
289
def raw_input_socket(request):
290
    """Return the socket for reading the rest of the request"""
291
    server_software = request.META.get('SERVER_SOFTWARE')
292
    if not server_software:
293
        if 'wsgi.input' in request.environ:
294
            return request.environ['wsgi.input']
295
        raise ServiceUnavailable('Unknown server software')
296
    if server_software.startswith('WSGIServer'):
297
        return request.environ['wsgi.input']
298
    elif server_software.startswith('mod_python'):
299
        return request._req
300
    raise ServiceUnavailable('Unknown server software')
301

    
302
MAX_UPLOAD_SIZE = 10 * (1024 * 1024) # 10MB
303

    
304
def socket_read_iterator(sock, length=0, blocksize=4096):
305
    """Return a maximum of blocksize data read from the socket in each iteration
306
    
307
    Read up to 'length'. If 'length' is negative, will attempt a chunked read.
308
    The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
309
    """
310
    if length < 0: # Chunked transfers
311
        data = ''
312
        while length < MAX_UPLOAD_SIZE:
313
            # Get chunk size.
314
            if hasattr(sock, 'readline'):
315
                chunk_length = sock.readline()
316
            else:
317
                chunk_length = ''
318
                while chunk_length[-1:] != '\n':
319
                    chunk_length += sock.read(1)
320
                chunk_length.strip()
321
            pos = chunk_length.find(';')
322
            if pos >= 0:
323
                chunk_length = chunk_length[:pos]
324
            try:
325
                chunk_length = int(chunk_length, 16)
326
            except Exception, e:
327
                raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
328
            # Check if done.
329
            if chunk_length == 0:
330
                if len(data) > 0:
331
                    yield data
332
                return
333
            # Get the actual data.
334
            while chunk_length > 0:
335
                chunk = sock.read(min(chunk_length, blocksize))
336
                chunk_length -= len(chunk)
337
                length += len(chunk)
338
                data += chunk
339
                if len(data) >= blocksize:
340
                    ret = data[:blocksize]
341
                    data = data[blocksize:]
342
                    yield ret
343
            sock.read(2) # CRLF
344
        # TODO: Raise something to note that maximum size is reached.
345
    else:
346
        if length > MAX_UPLOAD_SIZE:
347
            # TODO: Raise something to note that maximum size is reached.
348
            pass
349
        while length > 0:
350
            data = sock.read(min(length, blocksize))
351
            length -= len(data)
352
            yield data
353

    
354
class ObjectWrapper(object):
355
    """Return the object's data block-per-block in each iteration
356
    
357
    Read from the object using the offset and length provided in each entry of the range list.
358
    """
359
    
360
    def __init__(self, v_account, v_container, v_object, ranges, size, hashmap, boundary):
361
        self.v_account = v_account
362
        self.v_container = v_container
363
        self.v_object = v_object
364
        self.ranges = ranges
365
        self.size = size
366
        self.hashmap = hashmap
367
        self.boundary = boundary
368
        
369
        self.block_index = -1
370
        self.block = ''
371
        
372
        self.range_index = -1
373
        self.offset, self.length = self.ranges[0]
374
    
375
    def __iter__(self):
376
        return self
377
    
378
    def part_iterator(self):
379
        if self.length > 0:
380
            # Get the block for the current offset.
381
            bi = int(self.offset / backend.block_size)
382
            if self.block_index != bi:
383
                try:
384
                    self.block = backend.get_block(self.hashmap[bi])
385
                except NameError:
386
                    raise ItemNotFound('Block does not exist')
387
                self.block_index = bi
388
            # Get the data from the block.
389
            bo = self.offset % backend.block_size
390
            bl = min(self.length, backend.block_size - bo)
391
            data = self.block[bo:bo + bl]
392
            self.offset += bl
393
            self.length -= bl
394
            return data
395
        else:
396
            raise StopIteration
397
    
398
    def next(self):
399
        if len(self.ranges) == 1:
400
            return self.part_iterator()
401
        if self.range_index == len(self.ranges):
402
            raise StopIteration
403
        try:
404
            if self.range_index == -1:
405
                raise StopIteration
406
            return self.part_iterator()
407
        except StopIteration:
408
            self.range_index += 1
409
            out = []
410
            if self.range_index < len(self.ranges):
411
                # Part header.
412
                self.offset, self.length = self.ranges[self.range_index]
413
                if self.range_index > 0:
414
                    out.append('')
415
                out.append('--' + self.boundary)
416
                out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
417
                out.append('Content-Transfer-Encoding: binary')
418
                out.append('')
419
                out.append('')
420
                return '\r\n'.join(out)
421
            else:
422
                # Footer.
423
                out.append('')
424
                out.append('--' + self.boundary + '--')
425
                out.append('')
426
                return '\r\n'.join(out)
427

    
428
def update_response_headers(request, response):
429
    if request.serialization == 'xml':
430
        response['Content-Type'] = 'application/xml; charset=UTF-8'
431
    elif request.serialization == 'json':
432
        response['Content-Type'] = 'application/json; charset=UTF-8'
433
    elif not response['Content-Type']:
434
        response['Content-Type'] = 'text/plain; charset=UTF-8'
435

    
436
    if settings.TEST:
437
        response['Date'] = format_date_time(time())
438

    
439
def render_fault(request, fault):
440
    if settings.DEBUG or settings.TEST:
441
        fault.details = format_exc(fault)
442

    
443
    request.serialization = 'text'
444
    data = '\n'.join((fault.message, fault.details)) + '\n'
445
    response = HttpResponse(data, status=fault.code)
446
    update_response_headers(request, response)
447
    return response
448

    
449
def request_serialization(request, format_allowed=False):
450
    """Return the serialization format requested
451
    
452
    Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
453
    """
454
    if not format_allowed:
455
        return 'text'
456
    
457
    format = request.GET.get('format')
458
    if format == 'json':
459
        return 'json'
460
    elif format == 'xml':
461
        return 'xml'
462
    
463
    for item in request.META.get('HTTP_ACCEPT', '').split(','):
464
        accept, sep, rest = item.strip().partition(';')
465
        if accept == 'application/json':
466
            return 'json'
467
        elif accept == 'application/xml' or accept == 'text/xml':
468
            return 'xml'
469
    
470
    return 'text'
471

    
472
def api_method(http_method=None, format_allowed=False):
473
    """Decorator function for views that implement an API method"""
474
    def decorator(func):
475
        @wraps(func)
476
        def wrapper(request, *args, **kwargs):
477
            try:
478
                if http_method and request.method != http_method:
479
                    raise BadRequest('Method not allowed.')
480

    
481
                # The args variable may contain up to (account, container, object).
482
                if len(args) > 1 and len(args[1]) > 256:
483
                    raise BadRequest('Container name too large.')
484
                if len(args) > 2 and len(args[2]) > 1024:
485
                    raise BadRequest('Object name too large.')
486
                
487
                # Fill in custom request variables.
488
                request.serialization = request_serialization(request, format_allowed)
489
                # TODO: Authenticate.
490
                request.user = "test"
491
                
492
                response = func(request, *args, **kwargs)
493
                update_response_headers(request, response)
494
                return response
495
            except Fault, fault:
496
                return render_fault(request, fault)
497
            except BaseException, e:
498
                logger.exception('Unexpected error: %s' % e)
499
                fault = ServiceUnavailable('Unexpected error')
500
                return render_fault(request, fault)
501
        return wrapper
502
    return decorator