========================= ================================
Revision Description
========================= ================================
-0.3 (June 10, 2011) Allow for publicly available objects via ``https://hostname/public``.
+0.3 (June 14, 2011) Large object support with ``X-Object-Manifest``.
+\ Allow for publicly available objects via ``https://hostname/public``.
\ Support time-variant account/container listings.
\ Add source version when duplicating with PUT/COPY/MOVE.
\ Request version in object HEAD/GET requests (list versions with GET).
* Time-variant account/container listings via the ``until`` parameter.
* Object versions - parameter ``version`` in HEAD/GET (list versions with GET), ``X-Object-Version-*`` meta in replies, ``X-Source-Version`` in PUT/COPY/MOVE.
* Publicly accessible objects via ``https://hostname/public``. Control with ``X-Object-Public``.
+* Large object support with ``X-Object-Manifest``.
Clarifications/suggestions:
* Container/object lists use a ``200`` return code if the reply is of type json/xml. The reply will include an empty json/xml.
* In headers, dates are formatted according to RFC 1123. In extended information listings, dates are formatted according to ISO 8601.
* The ``Last-Modified`` header value always reflects the actual latest change timestamp, regardless of time control parameters and version requests. Time precondition checks with ``If-Modified-Since`` and ``If-Unmodified-Since`` headers are applied to this value.
-* While ``X-Object-Manifest`` can be set and unset, large object support is not yet implemented (**TBD**).
+* A ``HEAD`` or ``GET`` for an ``X-Object-Manifest`` object, will include modified ``Content-Length`` and ``ETag`` headers, according to the characteristics of the objects under the specified prefix. The ``Etag`` will be the MD5 hash of the corresponding ETags concatenated. In extended container listings there is no metadata processing.
The Pithos Client
-----------------
LengthRequired, PreconditionFailed, RangeNotSatisfiable, UnprocessableEntity)
from pithos.api.util import (format_meta_key, printable_meta_dict, get_account_meta,
put_account_meta, get_container_meta, put_container_meta, get_object_meta, put_object_meta,
- validate_modification_preconditions, validate_matching_preconditions, split_container_object_string,
- copy_or_move_object, get_int_parameter, get_content_length, get_content_range, raw_input_socket,
- socket_read_iterator, object_data_response, hashmap_hash, api_method)
+ update_manifest_meta, validate_modification_preconditions, validate_matching_preconditions,
+ split_container_object_string, copy_or_move_object, get_int_parameter, get_content_length,
+ get_content_range, raw_input_socket, socket_read_iterator, object_data_response,
+ hashmap_hash, api_method)
from pithos.backends import backend
except IndexError:
raise ItemNotFound('Version does not exist')
+ update_manifest_meta(request, v_account, meta)
+
response = HttpResponse(status=204)
put_object_meta(response, meta)
return response
except IndexError:
raise ItemNotFound('Version does not exist')
+ update_manifest_meta(request, v_account, meta)
+
# Evaluate conditions.
validate_modification_preconditions(request, meta)
try:
response['Content-Length'] = len(data)
return response
- try:
- size, hashmap = backend.get_object_hashmap(request.user, v_account, v_container, v_object, version)
- except NameError:
- raise ItemNotFound('Object does not exist')
- except IndexError:
- raise ItemNotFound('Version does not exist')
+ sizes = []
+ hashmaps = []
+ if 'X-Object-Manifest' in meta:
+ try:
+ src_container, src_name = split_container_object_string(meta['X-Object-Manifest'])
+ objects = backend.list_objects(request.user, v_account, src_container, prefix=src_name, virtual=False)
+ except ValueError:
+ raise BadRequest('Invalid X-Object-Manifest header')
+ except NameError:
+ raise ItemNotFound('Container does not exist')
+
+ try:
+ for x in objects:
+ s, h = backend.get_object_hashmap(request.user, v_account, src_container, x[0], x[1])
+ sizes.append(s)
+ hashmaps.append(h)
+ except NameError:
+ raise ItemNotFound('Object does not exist')
+ except IndexError:
+ raise ItemNotFound('Version does not exist')
+ else:
+ try:
+ s, h = backend.get_object_hashmap(request.user, v_account, v_container, v_object, version)
+ sizes.append(s)
+ hashmaps.append(h)
+ except NameError:
+ raise ItemNotFound('Object does not exist')
+ except IndexError:
+ raise ItemNotFound('Version does not exist')
# Reply with the hashmap.
if request.serialization != 'text':
+ size = sum(sizes)
+ hashmap = sum(hashmaps, [])
d = {'block_size': backend.block_size, 'block_hash': backend.hash_algorithm, 'bytes': size, 'hashes': hashmap}
if request.serialization == 'xml':
d['object'] = v_object
response['Content-Length'] = len(data)
return response
- return object_data_response(request, size, hashmap, meta)
+ return object_data_response(request, sizes, hashmaps, meta)
@api_method('PUT')
def object_write(request, v_account, v_container, v_object):
if k in meta:
response[k] = meta[k]
+def update_manifest_meta(request, v_account, meta):
+ """Update metadata if the object has an X-Object-Manifest."""
+
+ if 'X-Object-Manifest' in meta:
+ hash = ''
+ bytes = 0
+ try:
+ src_container, src_name = split_container_object_string(meta['X-Object-Manifest'])
+ objects = backend.list_objects(request.user, v_account, src_container, prefix=src_name, virtual=False)
+ for x in objects:
+ src_meta = backend.get_object_meta(request.user, v_account, src_container, x[0], x[1])
+ hash += src_meta['hash']
+ bytes += src_meta['bytes']
+ except:
+ # Ignore errors.
+ return
+ meta['bytes'] = bytes
+ md5 = hashlib.md5()
+ md5.update(hash)
+ meta['hash'] = md5.hexdigest().lower()
+
def validate_modification_preconditions(request, meta):
"""Check that the modified timestamp conforms with the preconditions set."""
raise NotModified('Resource Etag matches')
def split_container_object_string(s):
- parts = s.split('/')
- if len(parts) < 3 or parts[0] != '':
+ pos = s.find('/')
+ if pos == -1:
raise ValueError
- return parts[1], '/'.join(parts[2:])
+ return s[:pos], s[(pos + 1):]
def copy_or_move_object(request, v_account, src_container, src_name, dest_container, dest_name, move=False):
"""Copy or move an object."""
Read from the object using the offset and length provided in each entry of the range list.
"""
- def __init__(self, ranges, size, hashmap, boundary):
+ def __init__(self, ranges, sizes, hashmaps, boundary):
self.ranges = ranges
- self.size = size
- self.hashmap = hashmap
+ self.sizes = sizes
+ self.hashmaps = hashmaps
self.boundary = boundary
+ self.size = sum(self.sizes)
- self.block_index = -1
+ self.file_index = 0
+ self.block_index = 0
+ self.block_hash = -1
self.block = ''
self.range_index = -1
def part_iterator(self):
if self.length > 0:
- # Get the block for the current offset.
- bi = int(self.offset / backend.block_size)
- if self.block_index != bi:
+ # Get the file for the current offset.
+ file_size = self.sizes[self.file_index]
+ while self.offset >= file_size:
+ self.offset -= file_size
+ self.file_index += 1
+ file_size = self.sizes[self.file_index]
+
+ # Get the block for the current position.
+ self.block_index = int(self.offset / backend.block_size)
+ if self.block_hash != self.hashmaps[self.file_index][self.block_index]:
+ self.block_hash = self.hashmaps[self.file_index][self.block_index]
try:
- self.block = backend.get_block(self.hashmap[bi])
+ self.block = backend.get_block(self.block_hash)
except NameError:
raise ItemNotFound('Block does not exist')
- self.block_index = bi
+
# Get the data from the block.
bo = self.offset % backend.block_size
- bl = min(self.length, backend.block_size - bo)
+ bl = min(self.length, len(self.block) - bo)
data = self.block[bo:bo + bl]
self.offset += bl
self.length -= bl
if self.range_index < len(self.ranges):
# Part header.
self.offset, self.length = self.ranges[self.range_index]
+ self.file_index = 0
if self.range_index > 0:
out.append('')
out.append('--' + self.boundary)
out.append('')
return '\r\n'.join(out)
-def object_data_response(request, size, hashmap, meta, public=False):
+def object_data_response(request, sizes, hashmaps, meta, public=False):
"""Get the HttpResponse object for replying with the object's data."""
# Range handling.
+ size = sum(sizes)
ranges = get_range(request, size)
if ranges is None:
ranges = [(0, size)]
boundary = uuid.uuid4().hex
else:
boundary = ''
- wrapper = ObjectWrapper(ranges, size, hashmap, boundary)
+ wrapper = ObjectWrapper(ranges, sizes, hashmaps, boundary)
response = HttpResponse(wrapper, status=ret)
put_object_meta(response, meta, public)
if ret == 206:
from django.http import HttpResponse
from pithos.api.faults import (Fault, BadRequest, ItemNotFound)
-from pithos.api.util import (put_object_meta, validate_modification_preconditions,
- validate_matching_preconditions, object_data_response, api_method)
+from pithos.api.util import (put_object_meta, update_manifest_meta,
+ validate_modification_preconditions, validate_matching_preconditions,
+ object_data_response, api_method)
from pithos.backends import backend
if 'X-Object-Public' not in meta:
raise ItemNotFound('Object does not exist')
+ update_manifest_meta(request, v_account, meta)
response = HttpResponse(status=204)
put_object_meta(response, meta, True)
if 'X-Object-Public' not in meta:
raise ItemNotFound('Object does not exist')
+ update_manifest_meta(request, v_account, meta)
# Evaluate conditions.
validate_modification_preconditions(request, meta)
response['ETag'] = meta['hash']
return response
- try:
- size, hashmap = backend.get_object_hashmap(request.user, v_account, v_container, v_object)
- except NameError:
- raise ItemNotFound('Object does not exist')
+ sizes = []
+ hashmaps = []
+ if 'X-Object-Manifest' in meta:
+ try:
+ src_container, src_name = split_container_object_string(meta['X-Object-Manifest'])
+ objects = backend.list_objects(request.user, v_account, src_container, prefix=src_name, virtual=False)
+ except ValueError:
+ raise ItemNotFound('Object does not exist')
+ except NameError:
+ raise ItemNotFound('Object does not exist')
+
+ try:
+ for x in objects:
+ s, h = backend.get_object_hashmap(request.user, v_account, src_container, x[0], x[1])
+ sizes.append(s)
+ hashmaps.append(h)
+ except NameError:
+ raise ItemNotFound('Object does not exist')
+ else:
+ try:
+ s, h = backend.get_object_hashmap(request.user, v_account, v_container, v_object, version)
+ sizes.append(s)
+ hashmaps.append(h)
+ except NameError:
+ raise ItemNotFound('Object does not exist')
- return object_data_response(request, size, hashmap, meta, True)
+ return object_data_response(request, sizes, hashmaps, meta, True)
@api_method()
def method_not_allowed(request):