1 # Copyright 2011 GRNET S.A. All rights reserved.
3 # Redistribution and use in source and binary forms, with or
4 # without modification, are permitted provided that the following
7 # 1. Redistributions of source code must retain the above
8 # copyright notice, this list of conditions and the following
11 # 2. Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following
13 # disclaimer in the documentation and/or other materials
14 # provided with the distribution.
16 # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17 # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 # POSSIBILITY OF SUCH DAMAGE.
29 # The views and conclusions contained in the software and
30 # documentation are those of the authors and should not be
31 # interpreted as representing official policies, either expressed
32 # or implied, of GRNET S.A.
34 from functools import wraps
36 from traceback import format_exc
37 from wsgiref.handlers import format_date_time
38 from binascii import hexlify, unhexlify
39 from datetime import datetime, tzinfo, timedelta
41 from django.conf import settings
42 from django.http import HttpResponse
43 from django.utils import simplejson as json
44 from django.utils.http import http_date, parse_etags
45 from django.utils.encoding import smart_str
47 from pithos.api.compat import parse_http_date_safe, parse_http_date
48 from pithos.api.faults import (Fault, NotModified, BadRequest, Unauthorized, ItemNotFound,
49 Conflict, LengthRequired, PreconditionFailed, RangeNotSatisfiable,
51 from pithos.backends import connect_backend
52 from pithos.backends.base import NotAllowedError
60 logger = logging.getLogger(__name__)
64 def utcoffset(self, dt):
74 """Return an ISO8601 date string that includes a timezone."""
76 return d.replace(tzinfo=UTC()).isoformat()
78 def rename_meta_key(d, old, new):
84 def printable_header_dict(d):
85 """Format a meta dictionary for printing out json/xml.
87 Convert all keys to lower case and replace dashes with underscores.
88 Format 'last_modified' timestamp.
91 d['last_modified'] = isoformat(datetime.fromtimestamp(int(d['last_modified'])))
92 return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
94 def format_header_key(k):
95 """Convert underscores to dashes and capitalize intra-dash strings."""
96 return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
98 def get_header_prefix(request, prefix):
99 """Get all prefix-* request headers in a dict. Reformat keys with format_header_key()."""
101 prefix = 'HTTP_' + prefix.upper().replace('-', '_')
102 # TODO: Document or remove '~' replacing.
103 return dict([(format_header_key(k[5:]), v.replace('~', '')) for k, v in request.META.iteritems() if k.startswith(prefix) and len(k) > len(prefix)])
105 def get_account_headers(request):
106 meta = get_header_prefix(request, 'X-Account-Meta-')
108 for k, v in get_header_prefix(request, 'X-Account-Group-').iteritems():
110 if '-' in n or '_' in n:
111 raise BadRequest('Bad characters in group name')
112 groups[n] = v.replace(' ', '').split(',')
117 def put_account_headers(response, meta, groups):
119 response['X-Account-Container-Count'] = meta['count']
121 response['X-Account-Bytes-Used'] = meta['bytes']
122 response['Last-Modified'] = http_date(int(meta['modified']))
123 for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
124 response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
125 if 'until_timestamp' in meta:
126 response['X-Account-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
127 for k, v in groups.iteritems():
128 k = smart_str(k, strings_only=True)
129 k = format_header_key('X-Account-Group-' + k)
130 v = smart_str(','.join(v), strings_only=True)
133 def get_container_headers(request):
134 meta = get_header_prefix(request, 'X-Container-Meta-')
135 policy = dict([(k[19:].lower(), v.replace(' ', '')) for k, v in get_header_prefix(request, 'X-Container-Policy-').iteritems()])
138 def put_container_headers(request, response, meta, policy):
140 response['X-Container-Object-Count'] = meta['count']
142 response['X-Container-Bytes-Used'] = meta['bytes']
143 response['Last-Modified'] = http_date(int(meta['modified']))
144 for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
145 response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
146 l = [smart_str(x, strings_only=True) for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
147 response['X-Container-Object-Meta'] = ','.join([x[14:] for x in l])
148 response['X-Container-Block-Size'] = request.backend.block_size
149 response['X-Container-Block-Hash'] = request.backend.hash_algorithm
150 if 'until_timestamp' in meta:
151 response['X-Container-Until-Timestamp'] = http_date(int(meta['until_timestamp']))
152 for k, v in policy.iteritems():
153 response[smart_str(format_header_key('X-Container-Policy-' + k), strings_only=True)] = smart_str(v, strings_only=True)
155 def get_object_headers(request):
156 meta = get_header_prefix(request, 'X-Object-Meta-')
157 if request.META.get('CONTENT_TYPE'):
158 meta['Content-Type'] = request.META['CONTENT_TYPE']
159 if request.META.get('HTTP_CONTENT_ENCODING'):
160 meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
161 if request.META.get('HTTP_CONTENT_DISPOSITION'):
162 meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
163 if request.META.get('HTTP_X_OBJECT_MANIFEST'):
164 meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
165 return meta, get_sharing(request), get_public(request)
167 def put_object_headers(response, meta, restricted=False):
168 response['ETag'] = meta['hash']
169 response['Content-Length'] = meta['bytes']
170 response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
171 response['Last-Modified'] = http_date(int(meta['modified']))
173 response['X-Object-Modified-By'] = smart_str(meta['modified_by'], strings_only=True)
174 response['X-Object-Version'] = meta['version']
175 response['X-Object-Version-Timestamp'] = http_date(int(meta['version_timestamp']))
176 for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
177 response[smart_str(k, strings_only=True)] = smart_str(meta[k], strings_only=True)
178 for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest',
179 'X-Object-Sharing', 'X-Object-Shared-By', 'X-Object-Allowed-To',
182 response[k] = smart_str(meta[k], strings_only=True)
184 for k in ('Content-Encoding', 'Content-Disposition'):
186 response[k] = meta[k]
188 def update_manifest_meta(request, v_account, meta):
189 """Update metadata if the object has an X-Object-Manifest."""
191 if 'X-Object-Manifest' in meta:
195 src_container, src_name = split_container_object_string('/' + meta['X-Object-Manifest'])
196 objects = request.backend.list_objects(request.user, v_account,
197 src_container, prefix=src_name, virtual=False)
199 src_meta = request.backend.get_object_meta(request.user,
200 v_account, src_container, x[0], x[1])
201 hash += src_meta['hash']
202 bytes += src_meta['bytes']
206 meta['bytes'] = bytes
209 meta['hash'] = md5.hexdigest().lower()
211 def update_sharing_meta(request, permissions, v_account, v_container, v_object, meta):
212 if permissions is None:
214 allowed, perm_path, perms = permissions
218 r = ','.join(perms.get('read', []))
220 ret.append('read=' + r)
221 w = ','.join(perms.get('write', []))
223 ret.append('write=' + w)
224 meta['X-Object-Sharing'] = '; '.join(ret)
225 if '/'.join((v_account, v_container, v_object)) != perm_path:
226 meta['X-Object-Shared-By'] = perm_path
227 if request.user != v_account:
228 meta['X-Object-Allowed-To'] = allowed
230 def update_public_meta(public, meta):
233 meta['X-Object-Public'] = public
235 def validate_modification_preconditions(request, meta):
236 """Check that the modified timestamp conforms with the preconditions set."""
238 if 'modified' not in meta:
239 return # TODO: Always return?
241 if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
242 if if_modified_since is not None:
243 if_modified_since = parse_http_date_safe(if_modified_since)
244 if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
245 raise NotModified('Resource has not been modified')
247 if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
248 if if_unmodified_since is not None:
249 if_unmodified_since = parse_http_date_safe(if_unmodified_since)
250 if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
251 raise PreconditionFailed('Resource has been modified')
253 def validate_matching_preconditions(request, meta):
254 """Check that the ETag conforms with the preconditions set."""
256 hash = meta.get('hash', None)
258 if_match = request.META.get('HTTP_IF_MATCH')
259 if if_match is not None:
261 raise PreconditionFailed('Resource does not exist')
262 if if_match != '*' and hash not in [x.lower() for x in parse_etags(if_match)]:
263 raise PreconditionFailed('Resource ETag does not match')
265 if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
266 if if_none_match is not None:
267 # TODO: If this passes, must ignore If-Modified-Since header.
269 if if_none_match == '*' or hash in [x.lower() for x in parse_etags(if_none_match)]:
270 # TODO: Continue if an If-Modified-Since header is present.
271 if request.method in ('HEAD', 'GET'):
272 raise NotModified('Resource ETag matches')
273 raise PreconditionFailed('Resource exists or ETag matches')
275 def split_container_object_string(s):
276 if not len(s) > 0 or s[0] != '/':
282 return s[:pos], s[(pos + 1):]
284 def copy_or_move_object(request, v_account, src_container, src_name, dest_container, dest_name, move=False):
285 """Copy or move an object."""
287 meta, permissions, public = get_object_headers(request)
288 src_version = request.META.get('HTTP_X_SOURCE_VERSION')
291 version_id = request.backend.move_object(request.user, v_account,
292 src_container, src_name, dest_container, dest_name,
293 meta, False, permissions)
295 version_id = request.backend.copy_object(request.user, v_account,
296 src_container, src_name, dest_container, dest_name,
297 meta, False, permissions, src_version)
298 except NotAllowedError:
299 raise Unauthorized('Access denied')
300 except (NameError, IndexError):
301 raise ItemNotFound('Container or object does not exist')
303 raise BadRequest('Invalid sharing header')
304 except AttributeError, e:
305 raise Conflict(json.dumps(e.data))
306 if public is not None:
308 request.backend.update_object_public(request.user, v_account,
309 dest_container, dest_name, public)
310 except NotAllowedError:
311 raise Unauthorized('Access denied')
313 raise ItemNotFound('Object does not exist')
316 def get_int_parameter(p):
326 def get_content_length(request):
327 content_length = get_int_parameter(request.META.get('CONTENT_LENGTH'))
328 if content_length is None:
329 raise LengthRequired('Missing or invalid Content-Length header')
330 return content_length
332 def get_range(request, size):
333 """Parse a Range header from the request.
335 Either returns None, when the header is not existent or should be ignored,
336 or a list of (offset, length) tuples - should be further checked.
339 ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
340 if not ranges.startswith('bytes='):
344 for r in (x.strip() for x in ranges[6:].split(',')):
345 p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
349 offset = m.group('offset')
350 upto = m.group('upto')
351 if offset == '' and upto == '':
360 ret.append((offset, upto - offset + 1))
362 ret.append((offset, size - offset))
365 ret.append((size - length, length))
369 def get_content_range(request):
370 """Parse a Content-Range header from the request.
372 Either returns None, when the header is not existent or should be ignored,
373 or an (offset, length, total) tuple - check as length, total may be None.
374 Returns (None, None, None) if the provided range is '*/*'.
377 ranges = request.META.get('HTTP_CONTENT_RANGE', '')
381 p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
384 if ranges == 'bytes */*':
385 return (None, None, None)
387 offset = int(m.group('offset'))
388 upto = m.group('upto')
389 total = m.group('total')
398 if (upto is not None and offset > upto) or \
399 (total is not None and offset >= total) or \
400 (total is not None and upto is not None and upto >= total):
406 length = upto - offset + 1
407 return (offset, length, total)
409 def get_sharing(request):
410 """Parse an X-Object-Sharing header from the request.
412 Raises BadRequest on error.
415 permissions = request.META.get('HTTP_X_OBJECT_SHARING')
416 if permissions is None:
419 # TODO: Document or remove '~' replacing.
420 permissions = permissions.replace('~', '')
423 permissions = permissions.replace(' ', '')
424 if permissions == '':
426 for perm in (x for x in permissions.split(';')):
427 if perm.startswith('read='):
428 ret['read'] = list(set([v.replace(' ','').lower() for v in perm[5:].split(',')]))
429 if '' in ret['read']:
430 ret['read'].remove('')
431 if '*' in ret['read']:
433 if len(ret['read']) == 0:
434 raise BadRequest('Bad X-Object-Sharing header value')
435 elif perm.startswith('write='):
436 ret['write'] = list(set([v.replace(' ','').lower() for v in perm[6:].split(',')]))
437 if '' in ret['write']:
438 ret['write'].remove('')
439 if '*' in ret['write']:
441 if len(ret['write']) == 0:
442 raise BadRequest('Bad X-Object-Sharing header value')
444 raise BadRequest('Bad X-Object-Sharing header value')
446 # Keep duplicates only in write list.
447 dups = [x for x in ret.get('read', []) if x in ret.get('write', []) and x != '*']
450 ret['read'].remove(x)
451 if len(ret['read']) == 0:
456 def get_public(request):
457 """Parse an X-Object-Public header from the request.
459 Raises BadRequest on error.
462 public = request.META.get('HTTP_X_OBJECT_PUBLIC')
466 public = public.replace(' ', '').lower()
469 elif public == 'false' or public == '':
471 raise BadRequest('Bad X-Object-Public header value')
473 def raw_input_socket(request):
474 """Return the socket for reading the rest of the request."""
476 server_software = request.META.get('SERVER_SOFTWARE')
477 if server_software and server_software.startswith('mod_python'):
479 if 'wsgi.input' in request.environ:
480 return request.environ['wsgi.input']
481 raise ServiceUnavailable('Unknown server software')
483 MAX_UPLOAD_SIZE = 5 * (1024 * 1024 * 1024) # 5GB
485 def socket_read_iterator(request, length=0, blocksize=4096):
486 """Return a maximum of blocksize data read from the socket in each iteration.
488 Read up to 'length'. If 'length' is negative, will attempt a chunked read.
489 The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
492 sock = raw_input_socket(request)
493 if length < 0: # Chunked transfers
494 # Small version (server does the dechunking).
495 if request.environ.get('mod_wsgi.input_chunked', None):
496 while length < MAX_UPLOAD_SIZE:
497 data = sock.read(blocksize)
501 raise BadRequest('Maximum size is reached')
503 # Long version (do the dechunking).
505 while length < MAX_UPLOAD_SIZE:
507 if hasattr(sock, 'readline'):
508 chunk_length = sock.readline()
511 while chunk_length[-1:] != '\n':
512 chunk_length += sock.read(1)
514 pos = chunk_length.find(';')
516 chunk_length = chunk_length[:pos]
518 chunk_length = int(chunk_length, 16)
520 raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
522 if chunk_length == 0:
526 # Get the actual data.
527 while chunk_length > 0:
528 chunk = sock.read(min(chunk_length, blocksize))
529 chunk_length -= len(chunk)
533 if len(data) >= blocksize:
534 ret = data[:blocksize]
535 data = data[blocksize:]
538 raise BadRequest('Maximum size is reached')
540 if length > MAX_UPLOAD_SIZE:
541 raise BadRequest('Maximum size is reached')
543 data = sock.read(min(length, blocksize))
549 class ObjectWrapper(object):
550 """Return the object's data block-per-block in each iteration.
552 Read from the object using the offset and length provided in each entry of the range list.
555 def __init__(self, backend, ranges, sizes, hashmaps, boundary):
556 self.backend = backend
559 self.hashmaps = hashmaps
560 self.boundary = boundary
561 self.size = sum(self.sizes)
568 self.range_index = -1
569 self.offset, self.length = self.ranges[0]
574 def part_iterator(self):
576 # Get the file for the current offset.
577 file_size = self.sizes[self.file_index]
578 while self.offset >= file_size:
579 self.offset -= file_size
581 file_size = self.sizes[self.file_index]
583 # Get the block for the current position.
584 self.block_index = int(self.offset / self.backend.block_size)
585 if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
586 self.block_hash = self.hashmaps[self.file_index][self.block_index]
588 self.block = self.backend.get_block(self.block_hash)
590 raise ItemNotFound('Block does not exist')
592 # Get the data from the block.
593 bo = self.offset % self.backend.block_size
594 bl = min(self.length, len(self.block) - bo)
595 data = self.block[bo:bo + bl]
603 if len(self.ranges) == 1:
604 return self.part_iterator()
605 if self.range_index == len(self.ranges):
608 if self.range_index == -1:
610 return self.part_iterator()
611 except StopIteration:
612 self.range_index += 1
614 if self.range_index < len(self.ranges):
616 self.offset, self.length = self.ranges[self.range_index]
618 if self.range_index > 0:
620 out.append('--' + self.boundary)
621 out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
622 out.append('Content-Transfer-Encoding: binary')
625 return '\r\n'.join(out)
629 out.append('--' + self.boundary + '--')
631 return '\r\n'.join(out)
633 def object_data_response(request, sizes, hashmaps, meta, public=False):
634 """Get the HttpResponse object for replying with the object's data."""
638 ranges = get_range(request, size)
643 check = [True for offset, length in ranges if
644 length <= 0 or length > size or
645 offset < 0 or offset >= size or
646 offset + length > size]
648 raise RangeNotSatisfiable('Requested range exceeds object limits')
650 if_range = request.META.get('HTTP_IF_RANGE')
653 # Modification time has passed instead.
654 last_modified = parse_http_date(if_range)
655 if last_modified != meta['modified']:
659 if if_range != meta['hash']:
663 if ret == 206 and len(ranges) > 1:
664 boundary = uuid.uuid4().hex
667 wrapper = ObjectWrapper(request.backend, ranges, sizes, hashmaps, boundary)
668 response = HttpResponse(wrapper, status=ret)
669 put_object_headers(response, meta, public)
672 offset, length = ranges[0]
673 response['Content-Length'] = length # Update with the correct length.
674 response['Content-Range'] = 'bytes %d-%d/%d' % (offset, offset + length - 1, size)
676 del(response['Content-Length'])
677 response['Content-Type'] = 'multipart/byteranges; boundary=%s' % (boundary,)
680 def put_object_block(request, hashmap, data, offset):
681 """Put one block of data at the given offset."""
683 bi = int(offset / request.backend.block_size)
684 bo = offset % request.backend.block_size
685 bl = min(len(data), request.backend.block_size - bo)
686 if bi < len(hashmap):
687 hashmap[bi] = request.backend.update_block(hashmap[bi], data[:bl], bo)
689 hashmap.append(request.backend.put_block(('\x00' * bo) + data[:bl]))
690 return bl # Return ammount of data written.
692 def hashmap_hash(request, hashmap):
693 """Produce the root hash, treating the hashmap as a Merkle-like tree."""
696 h = hashlib.new(request.backend.hash_algorithm)
700 if len(hashmap) == 0:
701 return hexlify(subhash(''))
702 if len(hashmap) == 1:
706 while s < len(hashmap):
708 h = [unhexlify(x) for x in hashmap]
709 h += [('\x00' * len(h[0]))] * (s - len(hashmap))
711 h = [subhash(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
714 def update_response_headers(request, response):
715 if request.serialization == 'xml':
716 response['Content-Type'] = 'application/xml; charset=UTF-8'
717 elif request.serialization == 'json':
718 response['Content-Type'] = 'application/json; charset=UTF-8'
719 elif not response['Content-Type']:
720 response['Content-Type'] = 'text/plain; charset=UTF-8'
722 if not response.has_header('Content-Length') and not (response.has_header('Content-Type') and response['Content-Type'].startswith('multipart/byteranges')):
723 response['Content-Length'] = len(response.content)
726 response['Date'] = format_date_time(time())
728 def render_fault(request, fault):
729 if settings.DEBUG or settings.TEST:
730 fault.details = format_exc(fault)
732 request.serialization = 'text'
733 data = '\n'.join((fault.message, fault.details)) + '\n'
734 response = HttpResponse(data, status=fault.code)
735 update_response_headers(request, response)
738 def request_serialization(request, format_allowed=False):
739 """Return the serialization format requested.
741 Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
744 if not format_allowed:
747 format = request.GET.get('format')
750 elif format == 'xml':
753 for item in request.META.get('HTTP_ACCEPT', '').split(','):
754 accept, sep, rest = item.strip().partition(';')
755 if accept == 'application/json':
757 elif accept == 'application/xml' or accept == 'text/xml':
762 def api_method(http_method=None, format_allowed=False):
763 """Decorator function for views that implement an API method."""
767 def wrapper(request, *args, **kwargs):
769 if http_method and request.method != http_method:
770 raise BadRequest('Method not allowed.')
772 # The args variable may contain up to (account, container, object).
773 if len(args) > 1 and len(args[1]) > 256:
774 raise BadRequest('Container name too large.')
775 if len(args) > 2 and len(args[2]) > 1024:
776 raise BadRequest('Object name too large.')
778 # Fill in custom request variables.
779 request.serialization = request_serialization(request, format_allowed)
780 request.backend = connect_backend()
782 response = func(request, *args, **kwargs)
783 update_response_headers(request, response)
786 return render_fault(request, fault)
787 except BaseException, e:
788 logger.exception('Unexpected error: %s' % e)
789 fault = ServiceUnavailable('Unexpected error')
790 return render_fault(request, fault)
792 request.backend.wrapper.conn.close()