1 # Copyright 2011-2013 GRNET S.A. All rights reserved.
3 # Redistribution and use in source and binary forms, with or
4 # without modification, are permitted provided that the following
7 # 1. Redistributions of source code must retain the above
8 # copyright notice, this list of conditions and the following
11 # 2. Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following
13 # disclaimer in the documentation and/or other materials
14 # provided with the distribution.
16 # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17 # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 # POSSIBILITY OF SUCH DAMAGE.
29 # The views and conclusions contained in the software and
30 # documentation are those of the authors and should not be
31 # interpreted as representing official policies, either expressed
32 # or implied, of GRNET S.A.
34 from threading import enumerate as activethreads
37 from hashlib import new as newhashlib
40 from binascii import hexlify
42 from kamaki.clients import SilentEvent, sendlog
43 from kamaki.clients.pithos.rest_api import PithosRestClient
44 from kamaki.clients.storage import ClientError
45 from kamaki.clients.utils import path4url, filter_in
46 from StringIO import StringIO
49 def _pithos_hash(block, blockhash):
50 h = newhashlib(blockhash)
51 h.update(block.rstrip('\x00'))
55 def _range_up(start, end, a_range):
57 (rstart, rend) = a_range.split('-')
58 (rstart, rend) = (int(rstart), int(rend))
59 if rstart > end or rend < start:
68 class PithosClient(PithosRestClient):
69 """Synnefo Pithos+ API client"""
71 def __init__(self, base_url, token, account=None, container=None):
72 super(PithosClient, self).__init__(base_url, token, account, container)
74 def purge_container(self, container=None):
75 """Delete an empty container and destroy associated blocks
77 cnt_back_up = self.container
79 self.container = container or cnt_back_up
80 self.container_delete(until=unicode(time()))
82 self.container = cnt_back_up
84 def upload_object_unchunked(
89 content_encoding=None,
90 content_disposition=None,
95 :param obj: (str) remote object path
97 :param f: open file descriptor
99 :param withHashFile: (bool)
101 :param size: (int) size of data to upload
105 :param content_encoding: (str)
107 :param content_disposition: (str)
109 :param content_type: (str)
111 :param sharing: {'read':[user and/or grp names],
112 'write':[usr and/or grp names]}
114 :param public: (bool)
116 :returns: (dict) created object metadata
118 self._assert_container()
124 data = json.dumps(json.loads(data))
126 raise ClientError('"%s" is not json-formated' % f.name, 1)
128 msg = '"%s" is not a valid hashmap file' % f.name
129 raise ClientError(msg, 1)
132 data = f.read(size) if size else f.read()
137 content_encoding=content_encoding,
138 content_disposition=content_disposition,
139 content_type=content_type,
145 def create_object_by_manifestation(
148 content_encoding=None,
149 content_disposition=None,
154 :param obj: (str) remote object path
158 :param content_encoding: (str)
160 :param content_disposition: (str)
162 :param content_type: (str)
164 :param sharing: {'read':[user and/or grp names],
165 'write':[usr and/or grp names]}
167 :param public: (bool)
169 :returns: (dict) created object metadata
171 self._assert_container()
176 content_encoding=content_encoding,
177 content_disposition=content_disposition,
178 content_type=content_type,
181 manifest='%s/%s' % (self.container, obj))
184 # upload_* auxiliary methods
185 def _put_block_async(self, data, hash, upload_gen=None):
186 event = SilentEvent(method=self._put_block, data=data, hash=hash)
190 def _put_block(self, data, hash):
191 r = self.container_post(
193 content_type='application/octet-stream',
194 content_length=len(data),
197 assert r.json[0] == hash, 'Local hash does not match server'
199 def _get_file_block_info(self, fileobj, size=None, cache=None):
201 :param fileobj: (file descriptor) source
203 :param size: (int) size of data to upload from source
205 :param cache: (dict) if provided, cache container info response to
206 avoid redundant calls
208 if isinstance(cache, dict):
210 meta = cache[self.container]
212 meta = self.get_container_info()
213 cache[self.container] = meta
215 meta = self.get_container_info()
216 blocksize = int(meta['x-container-block-size'])
217 blockhash = meta['x-container-block-hash']
218 size = size if size is not None else fstat(fileobj.fileno()).st_size
219 nblocks = 1 + (size - 1) // blocksize
220 return (blocksize, blockhash, size, nblocks)
222 def _create_or_get_missing_hashes(
229 if_etag_not_match=None,
230 content_encoding=None,
231 content_disposition=None,
239 content_type=content_type,
241 if_etag_match=if_etag_match,
242 if_etag_not_match=if_etag_not_match,
243 content_encoding=content_encoding,
244 content_disposition=content_disposition,
245 permissions=permissions,
248 return (None if r.status_code == 201 else r.json), r.headers
250 def _calculate_blocks_for_upload(
251 self, blocksize, blockhash, size, nblocks, hashes, hmap, fileobj,
255 hash_gen = hash_cb(nblocks)
258 for i in range(nblocks):
259 block = fileobj.read(min(blocksize, size - offset))
261 hash = _pithos_hash(block, blockhash)
263 hmap[hash] = (offset, bytes)
267 msg = 'Failed to calculate uploaded blocks:'
268 ' Offset and object size do not match'
269 assert offset == size, msg
271 def _upload_missing_blocks(self, missing, hmap, fileobj, upload_gen=None):
272 """upload missing blocks asynchronously"""
274 self._init_thread_limit()
279 offset, bytes = hmap[hash]
281 data = fileobj.read(bytes)
282 r = self._put_block_async(data, hash, upload_gen)
284 unfinished = self._watch_thread_limit(flying)
285 for thread in set(flying).difference(unfinished):
287 failures.append(thread)
290 ClientError) and thread.exception.status == 502:
291 self.POOLSIZE = self._thread_limit
292 elif thread.isAlive():
293 flying.append(thread)
301 for thread in flying:
304 failures.append(thread)
311 return [failure.kwargs['hash'] for failure in failures]
321 content_encoding=None,
322 content_disposition=None,
326 container_info_cache=None):
327 """Upload an object using multiple connections (threads)
329 :param obj: (str) remote object path
331 :param f: open file descriptor (rb)
333 :param hash_cb: optional progress.bar object for calculating hashes
335 :param upload_cb: optional progress.bar object for uploading
339 :param if_etag_match: (str) Push that value to if-match header at file
342 :param if_not_exist: (bool) If true, the file will be uploaded ONLY if
343 it does not exist remotely, otherwise the operation will fail.
344 Involves the case of an object with the same path is created while
345 the object is being uploaded.
347 :param content_encoding: (str)
349 :param content_disposition: (str)
351 :param content_type: (str)
353 :param sharing: {'read':[user and/or grp names],
354 'write':[usr and/or grp names]}
356 :param public: (bool)
358 :param container_info_cache: (dict) if given, avoid redundant calls to
359 server for container info (block size and hash information)
361 self._assert_container()
365 blocksize, blockhash, size, nblocks) = self._get_file_block_info(
366 f, size, container_info_cache)
367 (hashes, hmap, offset) = ([], {}, 0)
369 content_type = 'application/octet-stream'
371 self._calculate_blocks_for_upload(
378 hashmap = dict(bytes=size, hashes=hashes)
379 missing, obj_headers = self._create_or_get_missing_hashes(
381 content_type=content_type,
383 if_etag_match=if_etag_match,
384 if_etag_not_match='*' if if_not_exist else None,
385 content_encoding=content_encoding,
386 content_disposition=content_disposition,
394 upload_gen = upload_cb(len(missing))
395 for i in range(len(missing), len(hashmap['hashes']) + 1):
406 sendlog.info('%s blocks missing' % len(missing))
407 num_of_blocks = len(missing)
408 missing = self._upload_missing_blocks(
414 if num_of_blocks == len(missing):
417 num_of_blocks = len(missing)
422 '%s blocks failed to upload' % len(missing),
424 except KeyboardInterrupt:
425 sendlog.info('- - - wait for threads to finish')
426 for thread in activethreads():
434 content_type=content_type,
435 if_etag_match=if_etag_match,
436 if_etag_not_match='*' if if_not_exist else None,
444 # download_* auxiliary methods
445 def _get_remote_blocks_info(self, obj, **restargs):
446 #retrieve object hashmap
447 myrange = restargs.pop('data_range', None)
448 hashmap = self.get_object_hashmap(obj, **restargs)
449 restargs['data_range'] = myrange
450 blocksize = int(hashmap['block_size'])
451 blockhash = hashmap['block_hash']
452 total_size = hashmap['bytes']
453 #assert total_size/blocksize + 1 == len(hashmap['hashes'])
455 for i, h in enumerate(hashmap['hashes']):
456 # map_dict[h] = i CHAGE
458 map_dict[h].append(i)
461 return (blocksize, blockhash, total_size, hashmap['hashes'], map_dict)
463 def _dump_blocks_sync(
464 self, obj, remote_hashes, blocksize, total_size, dst, range,
466 for blockid, blockhash in enumerate(remote_hashes):
468 start = blocksize * blockid
469 is_last = start + blocksize > total_size
470 end = (total_size - 1) if is_last else (start + blocksize - 1)
471 (start, end) = _range_up(start, end, range)
472 args['data_range'] = 'bytes=%s-%s' % (start, end)
473 r = self.object_get(obj, success=(200, 206), **args)
478 def _get_block_async(self, obj, **args):
479 event = SilentEvent(self.object_get, obj, success=(200, 206), **args)
483 def _hash_from_file(self, fp, start, size, blockhash):
485 block = fp.read(size)
486 h = newhashlib(blockhash)
487 h.update(block.strip('\x00'))
488 return hexlify(h.digest())
490 def _thread2file(self, flying, blockids, local_file, offset=0, **restargs):
491 """write the results of a greenleted rest call to a file
493 :param offset: the offset of the file up to blocksize
494 - e.g. if the range is 10-100, all blocks will be written to
497 for i, (key, g) in enumerate(flying.items()):
502 block = g.value.content
503 for block_start in blockids[key]:
504 local_file.seek(block_start + offset)
505 local_file.write(block)
511 def _dump_blocks_async(
512 self, obj, remote_hashes, blocksize, total_size, local_file,
513 blockhash=None, resume=False, filerange=None, **restargs):
514 file_size = fstat(local_file.fileno()).st_size if resume else 0
516 blockid_dict = dict()
518 if filerange is not None:
519 rstart = int(filerange.split('-')[0])
520 offset = rstart if blocksize > rstart else rstart % blocksize
522 self._init_thread_limit()
523 for block_hash, blockids in remote_hashes.items():
524 blockids = [blk * blocksize for blk in blockids]
525 unsaved = [blk for blk in blockids if not (
526 blk < file_size and block_hash == self._hash_from_file(
527 local_file, blk, blocksize, blockhash))]
528 self._cb_next(len(blockids) - len(unsaved))
531 self._watch_thread_limit(flying.values())
533 flying, blockid_dict, local_file, offset,
535 end = total_size - 1 if key + blocksize > total_size\
536 else key + blocksize - 1
537 start, end = _range_up(key, end, filerange)
541 restargs['async_headers'] = {
542 'Range': 'bytes=%s-%s' % (start, end)}
543 flying[key] = self._get_block_async(obj, **restargs)
544 blockid_dict[key] = unsaved
546 for thread in flying.values():
548 self._thread2file(flying, blockid_dict, local_file, offset, **restargs)
558 if_modified_since=None,
559 if_unmodified_since=None):
560 """Download an object (multiple connections, random blocks)
562 :param obj: (str) remote object path
564 :param dst: open file descriptor (wb+)
566 :param download_cb: optional progress.bar object for downloading
568 :param version: (str) file version
570 :param resume: (bool) if set, preserve already downloaded file parts
572 :param range_str: (str) from, to are file positions (int) in bytes
574 :param if_match: (str)
576 :param if_none_match: (str)
578 :param if_modified_since: (str) formated date
580 :param if_unmodified_since: (str) formated date"""
583 data_range=None if range_str is None else 'bytes=%s' % range_str,
585 if_none_match=if_none_match,
586 if_modified_since=if_modified_since,
587 if_unmodified_since=if_unmodified_since)
594 remote_hashes) = self._get_remote_blocks_info(obj, **restargs)
595 assert total_size >= 0
598 self.progress_bar_gen = download_cb(len(hash_list))
602 self._dump_blocks_sync(
611 self._dump_blocks_async(
622 dst.truncate(total_size)
626 #Command Progress Bar method
627 def _cb_next(self, step=1):
628 if hasattr(self, 'progress_bar_gen'):
630 for i in xrange(step):
631 self.progress_bar_gen.next()
635 def _complete_cb(self):
638 self.progress_bar_gen.next()
642 def get_object_hashmap(
647 if_modified_since=None,
648 if_unmodified_since=None,
651 :param obj: (str) remote object path
653 :param if_match: (str)
655 :param if_none_match: (str)
657 :param if_modified_since: (str) formated date
659 :param if_unmodified_since: (str) formated date
661 :param data_range: (str) from-to where from and to are integers
662 denoting file positions in bytes
671 if_etag_match=if_match,
672 if_etag_not_match=if_none_match,
673 if_modified_since=if_modified_since,
674 if_unmodified_since=if_unmodified_since,
675 data_range=data_range)
676 except ClientError as err:
677 if err.status == 304 or err.status == 412:
682 def set_account_group(self, group, usernames):
686 :param usernames: (list)
688 self.account_post(update=True, groups={group: usernames})
690 def del_account_group(self, group):
694 self.account_post(update=True, groups={group: []})
696 def get_account_info(self, until=None):
698 :param until: (str) formated date
702 r = self.account_head(until=until)
703 if r.status_code == 401:
704 raise ClientError("No authorization", status=401)
707 def get_account_quota(self):
712 self.get_account_info(),
713 'X-Account-Policy-Quota',
716 def get_account_versioning(self):
721 self.get_account_info(),
722 'X-Account-Policy-Versioning',
725 def get_account_meta(self, until=None):
727 :meta until: (str) formated date
731 return filter_in(self.get_account_info(until=until), 'X-Account-Meta-')
733 def get_account_group(self):
737 return filter_in(self.get_account_info(), 'X-Account-Group-')
739 def set_account_meta(self, metapairs):
741 :param metapairs: (dict) {key1:val1, key2:val2, ...}
743 assert(type(metapairs) is dict)
744 self.account_post(update=True, metadata=metapairs)
746 def del_account_meta(self, metakey):
748 :param metakey: (str) metadatum key
750 self.account_post(update=True, metadata={metakey: ''})
753 def set_account_quota(self, quota):
757 self.account_post(update=True, quota=quota)
760 def set_account_versioning(self, versioning):
762 "param versioning: (str)
764 self.account_post(update=True, versioning=versioning)
766 def list_containers(self):
770 r = self.account_get()
773 def del_container(self, until=None, delimiter=None):
775 :param until: (str) formated date
777 :param delimiter: (str) with / empty container
779 :raises ClientError: 404 Container does not exist
781 :raises ClientError: 409 Container is not empty
783 self._assert_container()
784 r = self.container_delete(
787 success=(204, 404, 409))
788 if r.status_code == 404:
790 'Container "%s" does not exist' % self.container,
792 elif r.status_code == 409:
794 'Container "%s" is not empty' % self.container,
797 def get_container_versioning(self, container=None):
799 :param container: (str)
803 cnt_back_up = self.container
805 self.container = container or cnt_back_up
807 self.get_container_info(),
808 'X-Container-Policy-Versioning')
810 self.container = cnt_back_up
812 def get_container_limit(self, container=None):
814 :param container: (str)
818 cnt_back_up = self.container
820 self.container = container or cnt_back_up
822 self.get_container_info(),
823 'X-Container-Policy-Quota')
825 self.container = cnt_back_up
827 def get_container_info(self, until=None):
829 :param until: (str) formated date
833 :raises ClientError: 404 Container not found
836 r = self.container_head(until=until)
837 except ClientError as err:
838 err.details.append('for container %s' % self.container)
842 def get_container_meta(self, until=None):
844 :param until: (str) formated date
849 self.get_container_info(until=until),
852 def get_container_object_meta(self, until=None):
854 :param until: (str) formated date
859 self.get_container_info(until=until),
860 'X-Container-Object-Meta')
862 def set_container_meta(self, metapairs):
864 :param metapairs: (dict) {key1:val1, key2:val2, ...}
866 assert(type(metapairs) is dict)
867 self.container_post(update=True, metadata=metapairs)
869 def del_container_meta(self, metakey):
871 :param metakey: (str) metadatum key
873 self.container_post(update=True, metadata={metakey: ''})
875 def set_container_limit(self, limit):
879 self.container_post(update=True, quota=limit)
881 def set_container_versioning(self, versioning):
883 :param versioning: (str)
885 self.container_post(update=True, versioning=versioning)
887 def del_object(self, obj, until=None, delimiter=None):
889 :param obj: (str) remote object path
891 :param until: (str) formated date
893 :param delimiter: (str)
895 self._assert_container()
896 self.object_delete(obj, until=until, delimiter=delimiter)
898 def set_object_meta(self, obj, metapairs):
900 :param obj: (str) remote object path
902 :param metapairs: (dict) {key1:val1, key2:val2, ...}
904 assert(type(metapairs) is dict)
905 self.object_post(obj, update=True, metadata=metapairs)
907 def del_object_meta(self, obj, metakey):
909 :param obj: (str) remote object path
911 :param metakey: (str) metadatum key
913 self.object_post(obj, update=True, metadata={metakey: ''})
915 def publish_object(self, obj):
917 :param obj: (str) remote object path
919 :returns: (str) access url
921 self.object_post(obj, update=True, public=True)
922 info = self.get_object_info(obj)
923 pref, sep, rest = self.base_url.partition('//')
924 base = rest.split('/')[0]
925 return '%s%s%s/%s' % (pref, sep, base, info['x-object-public'])
927 def unpublish_object(self, obj):
929 :param obj: (str) remote object path
931 self.object_post(obj, update=True, public=False)
933 def get_object_info(self, obj, version=None):
935 :param obj: (str) remote object path
937 :param version: (str)
942 r = self.object_head(obj, version=version)
944 except ClientError as ce:
946 raise ClientError('Object %s not found' % obj, status=404)
949 def get_object_meta(self, obj, version=None):
951 :param obj: (str) remote object path
953 :param version: (str)
958 self.get_object_info(obj, version=version),
961 def get_object_sharing(self, obj):
963 :param obj: (str) remote object path
968 self.get_object_info(obj),
973 perms = r['x-object-sharing'].split(';')
978 raise ClientError('Incorrect reply format')
979 (key, val) = perm.strip().split('=')
983 def set_object_sharing(
985 read_permition=False, write_permition=False):
986 """Give read/write permisions to an object.
988 :param obj: (str) remote object path
990 :param read_permition: (list - bool) users and user groups that get
991 read permition for this object - False means all previous read
992 permissions will be removed
994 :param write_perimition: (list - bool) of users and user groups to get
995 write permition for this object - False means all previous write
996 permissions will be removed
999 perms = dict(read=read_permition or '', write=write_permition or '')
1000 self.object_post(obj, update=True, permissions=perms)
1002 def del_object_sharing(self, obj):
1004 :param obj: (str) remote object path
1006 self.set_object_sharing(obj)
1008 def append_object(self, obj, source_file, upload_cb=None):
1010 :param obj: (str) remote object path
1012 :param source_file: open file descriptor
1014 :param upload_db: progress.bar for uploading
1017 self._assert_container()
1018 meta = self.get_container_info()
1019 blocksize = int(meta['x-container-block-size'])
1020 filesize = fstat(source_file.fileno()).st_size
1021 nblocks = 1 + (filesize - 1) // blocksize
1024 upload_gen = upload_cb(nblocks)
1026 for i in range(nblocks):
1027 block = source_file.read(min(blocksize, filesize - offset))
1028 offset += len(block)
1032 content_range='bytes */*',
1033 content_type='application/octet-stream',
1034 content_length=len(block),
1040 def truncate_object(self, obj, upto_bytes):
1042 :param obj: (str) remote object path
1044 :param upto_bytes: max number of bytes to leave on file
1049 content_range='bytes 0-%s/*' % upto_bytes,
1050 content_type='application/octet-stream',
1051 object_bytes=upto_bytes,
1052 source_object=path4url(self.container, obj))
1054 def overwrite_object(self, obj, start, end, source_file, upload_cb=None):
1055 """Overwrite a part of an object from local source file
1057 :param obj: (str) remote object path
1059 :param start: (int) position in bytes to start overwriting from
1061 :param end: (int) position in bytes to stop overwriting at
1063 :param source_file: open file descriptor
1065 :param upload_db: progress.bar for uploading
1068 r = self.get_object_info(obj)
1069 rf_size = int(r['content-length'])
1070 if rf_size < int(start):
1072 'Range start exceeds file size',
1074 elif rf_size < int(end):
1076 'Range end exceeds file size',
1078 self._assert_container()
1079 meta = self.get_container_info()
1080 blocksize = int(meta['x-container-block-size'])
1081 filesize = fstat(source_file.fileno()).st_size
1082 datasize = int(end) - int(start) + 1
1083 nblocks = 1 + (datasize - 1) // blocksize
1086 upload_gen = upload_cb(nblocks)
1088 for i in range(nblocks):
1089 read_size = min(blocksize, filesize - offset, datasize - offset)
1090 block = source_file.read(read_size)
1094 content_type='application/octet-stream',
1095 content_length=len(block),
1096 content_range='bytes %s-%s/*' % (
1098 start + offset + len(block) - 1),
1100 offset += len(block)
1106 self, src_container, src_object, dst_container,
1108 source_version=None,
1109 source_account=None,
1114 :param src_container: (str) source container
1116 :param src_object: (str) source object path
1118 :param dst_container: (str) destination container
1120 :param dst_object: (str) destination object path
1122 :param source_version: (str) source object version
1124 :param source_account: (str) account to copy from
1126 :param public: (bool)
1128 :param content_type: (str)
1130 :param delimiter: (str)
1132 self._assert_account()
1133 self.container = dst_container
1134 src_path = path4url(src_container, src_object)
1136 dst_object or src_object,
1140 source_version=source_version,
1141 source_account=source_account,
1143 content_type=content_type,
1144 delimiter=delimiter)
1147 self, src_container, src_object, dst_container,
1149 source_account=None,
1150 source_version=None,
1155 :param src_container: (str) source container
1157 :param src_object: (str) source object path
1159 :param dst_container: (str) destination container
1161 :param dst_object: (str) destination object path
1163 :param source_account: (str) account to move from
1165 :param source_version: (str) source object version
1167 :param public: (bool)
1169 :param content_type: (str)
1171 :param delimiter: (str)
1173 self._assert_account()
1174 self.container = dst_container
1175 dst_object = dst_object or src_object
1176 src_path = path4url(src_container, src_object)
1182 source_account=source_account,
1183 source_version=source_version,
1185 content_type=content_type,
1186 delimiter=delimiter)
1188 def get_sharing_accounts(self, limit=None, marker=None, *args, **kwargs):
1189 """Get accounts that share with self.account
1193 :param marker: (str)
1197 self._assert_account()
1199 self.set_param('format', 'json')
1200 self.set_param('limit', limit, iff=limit is not None)
1201 self.set_param('marker', marker, iff=marker is not None)
1204 success = kwargs.pop('success', (200, 204))
1205 r = self.get(path, *args, success=success, **kwargs)
1208 def get_object_versionlist(self, obj):
1210 :param obj: (str) remote object path
1214 self._assert_container()
1215 r = self.object_get(obj, format='json', version='list')
1216 return r.json['versions']