code.grnet.gr Git - pithos/blob - pithos/api/util.py

   1 # Copyright 2011 GRNET S.A. All rights reserved.
   2 #
   3 # Redistribution and use in source and binary forms, with or
   4 # without modification, are permitted provided that the following
   5 # conditions are met:
   6 #
   7 #   1. Redistributions of source code must retain the above
   8 #      copyright notice, this list of conditions and the following
   9 #      disclaimer.
  10 #
  11 #   2. Redistributions in binary form must reproduce the above
  12 #      copyright notice, this list of conditions and the following
  13 #      disclaimer in the documentation and/or other materials
  14 #      provided with the distribution.
  15 #
  16 # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
  17 # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  19 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
  20 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  23 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  24 # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  26 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27 # POSSIBILITY OF SUCH DAMAGE.
  28 #
  29 # The views and conclusions contained in the software and
  30 # documentation are those of the authors and should not be
  31 # interpreted as representing official policies, either expressed
  32 # or implied, of GRNET S.A.
  33
  34 from functools import wraps
  35 from time import time
  36 from traceback import format_exc
  37 from wsgiref.handlers import format_date_time
  38 from binascii import hexlify
  39
  40 from django.conf import settings
  41 from django.http import HttpResponse
  42 from django.utils.http import http_date, parse_etags
  43
  44 from pithos.api.compat import parse_http_date_safe
  45 from pithos.api.faults import (Fault, NotModified, BadRequest, ItemNotFound, LengthRequired,
  46                                 PreconditionFailed, ServiceUnavailable)
  47 from pithos.backends import backend
  48
  49 import datetime
  50 import logging
  51 import re
  52
  53
  54 logger = logging.getLogger(__name__)
  55
  56
  57 def printable_meta_dict(d):
  58     """Format a meta dictionary for printing out json/xml.
  59
  60     Convert all keys to lower case and replace dashes to underscores.
  61     Change 'modified' key from backend to 'last_modified' and format date.
  62     """
  63     if 'modified' in d:
  64         d['last_modified'] = datetime.datetime.fromtimestamp(int(d['modified'])).isoformat()
  65         del(d['modified'])
  66     return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
  67
  68 def format_meta_key(k):
  69     """Convert underscores to dashes and capitalize intra-dash strings"""
  70     return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
  71
  72 def get_meta_prefix(request, prefix):
  73     """Get all prefix-* request headers in a dict. Reformat keys with format_meta_key()"""
  74     prefix = 'HTTP_' + prefix.upper().replace('-', '_')
  75     return dict([(format_meta_key(k[5:]), v) for k, v in request.META.iteritems() if k.startswith(prefix)])
  76
  77 def get_account_meta(request):
  78     """Get metadata from an account request"""
  79     meta = get_meta_prefix(request, 'X-Account-Meta-')
  80     return meta
  81
  82 def put_account_meta(response, meta):
  83     """Put metadata in an account response"""
  84     response['X-Account-Container-Count'] = meta['count']
  85     response['X-Account-Bytes-Used'] = meta['bytes']
  86     if 'modified' in meta:
  87         response['Last-Modified'] = http_date(int(meta['modified']))
  88     for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
  89         response[k.encode('utf-8')] = meta[k].encode('utf-8')
  90
  91 def get_container_meta(request):
  92     """Get metadata from a container request"""
  93     meta = get_meta_prefix(request, 'X-Container-Meta-')
  94     return meta
  95
  96 def put_container_meta(response, meta):
  97     """Put metadata in a container response"""
  98     response['X-Container-Object-Count'] = meta['count']
  99     response['X-Container-Bytes-Used'] = meta['bytes']
 100     if 'modified' in meta:
 101         response['Last-Modified'] = http_date(int(meta['modified']))
 102     for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
 103         response[k.encode('utf-8')] = meta[k].encode('utf-8')
 104     response['X-Container-Object-Meta'] = [x[14:] for x in meta['object_meta'] if x.startswith('X-Object-Meta-')]
 105     response['X-Container-Block-Size'] = backend.block_size
 106     response['X-Container-Block-Hash'] = backend.hash_algorithm
 107
 108 def get_object_meta(request):
 109     """Get metadata from an object request"""
 110     meta = get_meta_prefix(request, 'X-Object-Meta-')
 111     if request.META.get('CONTENT_TYPE'):
 112         meta['Content-Type'] = request.META['CONTENT_TYPE']
 113     if request.META.get('HTTP_CONTENT_ENCODING'):
 114         meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
 115     if request.META.get('HTTP_CONTENT_DISPOSITION'):
 116         meta['Content-Disposition'] = request.META['HTTP_CONTENT_DISPOSITION']
 117     if request.META.get('HTTP_X_OBJECT_MANIFEST'):
 118         meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
 119     return meta
 120
 121 def put_object_meta(response, meta):
 122     """Put metadata in an object response"""
 123     response['ETag'] = meta['hash']
 124     response['Content-Length'] = meta['bytes']
 125     response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
 126     response['Last-Modified'] = http_date(int(meta['modified']))
 127     for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
 128         response[k.encode('utf-8')] = meta[k].encode('utf-8')
 129     for k in ('Content-Encoding', 'Content-Disposition', 'X-Object-Manifest'):
 130         if k in meta:
 131             response[k] = meta[k]
 132
 133 def validate_modification_preconditions(request, meta):
 134     """Check that the modified timestamp conforms with the preconditions set"""
 135     if 'modified' not in meta:
 136         return # TODO: Always return?
 137
 138     if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
 139     if if_modified_since is not None:
 140         if_modified_since = parse_http_date_safe(if_modified_since)
 141     if if_modified_since is not None and int(meta['modified']) <= if_modified_since:
 142         raise NotModified('Object has not been modified')
 143
 144     if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
 145     if if_unmodified_since is not None:
 146         if_unmodified_since = parse_http_date_safe(if_unmodified_since)
 147     if if_unmodified_since is not None and int(meta['modified']) > if_unmodified_since:
 148         raise PreconditionFailed('Object has been modified')
 149
 150 def validate_matching_preconditions(request, meta):
 151     """Check that the ETag conforms with the preconditions set"""
 152     if 'hash' not in meta:
 153         return # TODO: Always return?
 154
 155     if_match = request.META.get('HTTP_IF_MATCH')
 156     if if_match is not None and if_match != '*':
 157         if meta['hash'] not in [x.lower() for x in parse_etags(if_match)]:
 158             raise PreconditionFailed('Object Etag does not match')
 159
 160     if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
 161     if if_none_match is not None:
 162         if if_none_match == '*' or meta['hash'] in [x.lower() for x in parse_etags(if_none_match)]:
 163             raise NotModified('Object Etag matches')
 164
 165 def copy_or_move_object(request, src_path, dest_path, move=False):
 166     """Copy or move an object"""
 167     if type(src_path) == str:
 168         parts = src_path.split('/')
 169         if len(parts) < 3 or parts[0] != '':
 170             raise BadRequest('Invalid X-Copy-From or X-Move-From header')
 171         src_container = parts[1]
 172         src_name = '/'.join(parts[2:])
 173     elif type(src_path) == tuple and len(src_path) == 2:
 174         src_container, src_name = src_path
 175     if type(dest_path) == str:
 176         parts = dest_path.split('/')
 177         if len(parts) < 3 or parts[0] != '':
 178             raise BadRequest('Invalid Destination header')
 179         dest_container = parts[1]
 180         dest_name = '/'.join(parts[2:])
 181     elif type(dest_path) == tuple and len(dest_path) == 2:
 182         dest_container, dest_name = dest_path
 183
 184     meta = get_object_meta(request)
 185     # Keep previous values of 'Content-Type' (if a new one is absent) and 'hash'.
 186     try:
 187         src_meta = backend.get_object_meta(request.user, src_container, src_name)
 188     except NameError:
 189         raise ItemNotFound('Container or object does not exist')
 190     if 'Content-Type' in meta and 'Content-Type' in src_meta:
 191         del(src_meta['Content-Type'])
 192     for k in ('Content-Type', 'hash'):
 193         if k in src_meta:
 194             meta[k] = src_meta[k]
 195
 196     try:
 197         if move:
 198             backend.move_object(request.user, src_container, src_name, dest_container, dest_name, meta, replace_meta=True)
 199         else:
 200             backend.copy_object(request.user, src_container, src_name, dest_container, dest_name, meta, replace_meta=True)
 201     except NameError:
 202         raise ItemNotFound('Container or object does not exist')
 203
 204 def get_content_length(request):
 205     content_length = request.META.get('CONTENT_LENGTH')
 206     if not content_length:
 207         raise LengthRequired('Missing Content-Length header')
 208     try:
 209         content_length = int(content_length)
 210         if content_length < 0:
 211             raise ValueError
 212     except ValueError:
 213         raise BadRequest('Invalid Content-Length header')
 214     return content_length
 215
 216 def get_range(request, size):
 217     """Parse a Range header from the request
 218
 219     Either returns None, when the header is not existent or should be ignored,
 220     or a list of (offset, length) tuples - should be further checked.
 221     """
 222     ranges = request.META.get('HTTP_RANGE', '').replace(' ', '')
 223     if not ranges.startswith('bytes='):
 224         return None
 225
 226     ret = []
 227     for r in (x.strip() for x in ranges[6:].split(',')):
 228         p = re.compile('^(?P<offset>\d*)-(?P<upto>\d*)$')
 229         m = p.match(r)
 230         if not m:
 231             return None
 232         offset = m.group('offset')
 233         upto = m.group('upto')
 234         if offset == '' and upto == '':
 235             return None
 236
 237         if offset != '':
 238             offset = int(offset)
 239             if upto != '':
 240                 upto = int(upto)
 241                 if offset > upto:
 242                     return None
 243                 ret.append((offset, upto - offset + 1))
 244             else:
 245                 ret.append((offset, size - offset))
 246         else:
 247             length = int(upto)
 248             ret.append((size - length, length))
 249
 250     return ret
 251
 252 def get_content_range(request):
 253     """Parse a Content-Range header from the request
 254
 255     Either returns None, when the header is not existent or should be ignored,
 256     or an (offset, length, total) tuple - check as length, total may be None.
 257     Returns (None, None, None) if the provided range is '*/*'.
 258     """
 259
 260     ranges = request.META.get('HTTP_CONTENT_RANGE', '')
 261     if not ranges:
 262         return None
 263
 264     p = re.compile('^bytes (?P<offset>\d+)-(?P<upto>\d*)/(?P<total>(\d+|\*))$')
 265     m = p.match(ranges)
 266     if not m:
 267         if ranges == 'bytes */*':
 268             return (None, None, None)
 269         return None
 270     offset = int(m.group('offset'))
 271     upto = m.group('upto')
 272     total = m.group('total')
 273     if upto != '':
 274         upto = int(upto)
 275     else:
 276         upto = None
 277     if total != '*':
 278         total = int(total)
 279     else:
 280         total = None
 281     if (upto and offset > upto) or \
 282         (total and offset >= total) or \
 283         (total and upto and upto >= total):
 284         return None
 285
 286     if not upto:
 287         length = None
 288     else:
 289         length = upto - offset + 1
 290     return (offset, length, total)
 291
 292 def raw_input_socket(request):
 293     """Return the socket for reading the rest of the request"""
 294     server_software = request.META.get('SERVER_SOFTWARE')
 295     if not server_software:
 296         if 'wsgi.input' in request.environ:
 297             return request.environ['wsgi.input']
 298         raise ServiceUnavailable('Unknown server software')
 299     if server_software.startswith('WSGIServer'):
 300         return request.environ['wsgi.input']
 301     elif server_software.startswith('mod_python'):
 302         return request._req
 303     raise ServiceUnavailable('Unknown server software')
 304
 305 MAX_UPLOAD_SIZE = 10 * (1024 * 1024) # 10MB
 306
 307 def socket_read_iterator(sock, length=0, blocksize=4096):
 308     """Return a maximum of blocksize data read from the socket in each iteration
 309
 310     Read up to 'length'. If 'length' is negative, will attempt a chunked read.
 311     The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
 312     """
 313     if length < 0: # Chunked transfers
 314         data = ''
 315         while length < MAX_UPLOAD_SIZE:
 316             # Get chunk size.
 317             if hasattr(sock, 'readline'):
 318                 chunk_length = sock.readline()
 319             else:
 320                 chunk_length = ''
 321                 while chunk_length[-1:] != '\n':
 322                     chunk_length += sock.read(1)
 323                 chunk_length.strip()
 324             pos = chunk_length.find(';')
 325             if pos >= 0:
 326                 chunk_length = chunk_length[:pos]
 327             try:
 328                 chunk_length = int(chunk_length, 16)
 329             except Exception, e:
 330                 raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
 331             # Check if done.
 332             if chunk_length == 0:
 333                 if len(data) > 0:
 334                     yield data
 335                 return
 336             # Get the actual data.
 337             while chunk_length > 0:
 338                 chunk = sock.read(min(chunk_length, blocksize))
 339                 chunk_length -= len(chunk)
 340                 length += len(chunk)
 341                 data += chunk
 342                 if len(data) >= blocksize:
 343                     ret = data[:blocksize]
 344                     data = data[blocksize:]
 345                     yield ret
 346             sock.read(2) # CRLF
 347         # TODO: Raise something to note that maximum size is reached.
 348     else:
 349         if length > MAX_UPLOAD_SIZE:
 350             # TODO: Raise something to note that maximum size is reached.
 351             pass
 352         while length > 0:
 353             data = sock.read(min(length, blocksize))
 354             length -= len(data)
 355             yield data
 356
 357 class ObjectWrapper(object):
 358     """Return the object's data block-per-block in each iteration
 359
 360     Read from the object using the offset and length provided in each entry of the range list.
 361     """
 362
 363     def __init__(self, v_account, v_container, v_object, ranges, size, hashmap, boundary):
 364         self.v_account = v_account
 365         self.v_container = v_container
 366         self.v_object = v_object
 367         self.ranges = ranges
 368         self.size = size
 369         self.hashmap = hashmap
 370         self.boundary = boundary
 371
 372         self.block_index = -1
 373         self.block = ''
 374
 375         self.range_index = -1
 376         self.offset, self.length = self.ranges[0]
 377
 378     def __iter__(self):
 379         return self
 380
 381     def part_iterator(self):
 382         if self.length > 0:
 383             # Get the block for the current offset.
 384             bi = int(self.offset / backend.block_size)
 385             if self.block_index != bi:
 386                 try:
 387                     self.block = backend.get_block(self.hashmap[bi])
 388                 except NameError:
 389                     raise ItemNotFound('Block does not exist')
 390                 self.block_index = bi
 391             # Get the data from the block.
 392             bo = self.offset % backend.block_size
 393             bl = min(self.length, backend.block_size - bo)
 394             data = self.block[bo:bo + bl]
 395             self.offset += bl
 396             self.length -= bl
 397             return data
 398         else:
 399             raise StopIteration
 400
 401     def next(self):
 402         if len(self.ranges) == 1:
 403             return self.part_iterator()
 404         if self.range_index == len(self.ranges):
 405             raise StopIteration
 406         try:
 407             if self.range_index == -1:
 408                 raise StopIteration
 409             return self.part_iterator()
 410         except StopIteration:
 411             self.range_index += 1
 412             out = []
 413             if self.range_index < len(self.ranges):
 414                 # Part header.
 415                 self.offset, self.length = self.ranges[self.range_index]
 416                 if self.range_index > 0:
 417                     out.append('')
 418                 out.append('--' + self.boundary)
 419                 out.append('Content-Range: bytes %d-%d/%d' % (self.offset, self.offset + self.length - 1, self.size))
 420                 out.append('Content-Transfer-Encoding: binary')
 421                 out.append('')
 422                 out.append('')
 423                 return '\r\n'.join(out)
 424             else:
 425                 # Footer.
 426                 out.append('')
 427                 out.append('--' + self.boundary + '--')
 428                 out.append('')
 429                 return '\r\n'.join(out)
 430
 431 def hashmap_hash(hashmap):
 432     """ Produce the root hash, treating the hashmap as a Merkle-like tree."""
 433
 434     def subhash(d):
 435         h = hashlib.new(backend.hash_algorithm)
 436         h.update(d)
 437         return h.digest()
 438
 439     # TODO: Should create the whole tree and decide what to do with fillers.
 440     h = hashmap
 441     h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)]
 442     while len(h) > 1:
 443         h = [subhash(h[x] + (h[x + 1] if x + 1 < len(h) else '')) for x in range(0, len(h), 2)]
 444     return hexlify(h[0])
 445
 446 def update_response_headers(request, response):
 447     if request.serialization == 'xml':
 448         response['Content-Type'] = 'application/xml; charset=UTF-8'
 449     elif request.serialization == 'json':
 450         response['Content-Type'] = 'application/json; charset=UTF-8'
 451     elif not response['Content-Type']:
 452         response['Content-Type'] = 'text/plain; charset=UTF-8'
 453
 454     if settings.TEST:
 455         response['Date'] = format_date_time(time())
 456
 457 def render_fault(request, fault):
 458     if settings.DEBUG or settings.TEST:
 459         fault.details = format_exc(fault)
 460
 461     request.serialization = 'text'
 462     data = '\n'.join((fault.message, fault.details)) + '\n'
 463     response = HttpResponse(data, status=fault.code)
 464     update_response_headers(request, response)
 465     return response
 466
 467 def request_serialization(request, format_allowed=False):
 468     """Return the serialization format requested
 469
 470     Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
 471     """
 472     if not format_allowed:
 473         return 'text'
 474
 475     format = request.GET.get('format')
 476     if format == 'json':
 477         return 'json'
 478     elif format == 'xml':
 479         return 'xml'
 480
 481     for item in request.META.get('HTTP_ACCEPT', '').split(','):
 482         accept, sep, rest = item.strip().partition(';')
 483         if accept == 'application/json':
 484             return 'json'
 485         elif accept == 'application/xml' or accept == 'text/xml':
 486             return 'xml'
 487
 488     return 'text'
 489
 490 def api_method(http_method=None, format_allowed=False):
 491     """Decorator function for views that implement an API method"""
 492     def decorator(func):
 493         @wraps(func)
 494         def wrapper(request, *args, **kwargs):
 495             try:
 496                 if http_method and request.method != http_method:
 497                     raise BadRequest('Method not allowed.')
 498
 499                 # The args variable may contain up to (account, container, object).
 500                 if len(args) > 1 and len(args[1]) > 256:
 501                     raise BadRequest('Container name too large.')
 502                 if len(args) > 2 and len(args[2]) > 1024:
 503                     raise BadRequest('Object name too large.')
 504
 505                 # Fill in custom request variables.
 506                 request.serialization = request_serialization(request, format_allowed)
 507                 # TODO: Authenticate.
 508                 request.user = "test"
 509
 510                 response = func(request, *args, **kwargs)
 511                 update_response_headers(request, response)
 512                 return response
 513             except Fault, fault:
 514                 return render_fault(request, fault)
 515             except BaseException, e:
 516                 logger.exception('Unexpected error: %s' % e)
 517                 fault = ServiceUnavailable('Unexpected error')
 518                 return render_fault(request, fault)
 519         return wrapper
 520     return decorator