From: Sofia Papagiannaki Date: Fri, 23 Sep 2011 12:46:26 +0000 (+0300) Subject: reorganize psend components: move HashMap & smart_upload in lib X-Git-Tag: pithos/v0.7.8~37^2 X-Git-Url: https://code.grnet.gr/git/pithos/commitdiff_plain/f390685d330756e108d640356b6908a9767c93c3 reorganize psend components: move HashMap & smart_upload in lib --- diff --git a/tools/lib/hashmap.py b/tools/lib/hashmap.py new file mode 100644 index 0000000..3f056bb --- /dev/null +++ b/tools/lib/hashmap.py @@ -0,0 +1,77 @@ +# Copyright 2011 GRNET S.A. All rights reserved. +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# 1. Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# 2. Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS +# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# The views and conclusions contained in the software and +# documentation are those of the authors and should not be +# interpreted as representing official policies, either expressed +# or implied, of GRNET S.A. + +import hashlib + +def file_read_iterator(fp, size=1024): + while True: + data = fp.read(size) + if not data: + break + yield data + +class HashMap(list): + + def __init__(self, f, blocksize, blockhash): + super(HashMap, self).__init__() + self.blocksize = blocksize + self.blockhash = blockhash + self.load(f) + + def _hash_raw(self, v): + h = hashlib.new(self.blockhash) + h.update(v) + return h.digest() + + def _hash_block(self, v): + return self._hash_raw(v.rstrip('\x00')) + + def hash(self): + if len(self) == 0: + return self._hash_raw('') + if len(self) == 1: + return self.__getitem__(0) + + h = list(self) + s = 2 + while s < len(h): + s = s * 2 + h += [('\x00' * len(h[0]))] * (s - len(h)) + while len(h) > 1: + h = [self._hash_raw(h[x] + h[x + 1]) for x in range(0, len(h), 2)] + return h[0] + + def load(self, f): + with open(f) as fp: + for block in file_read_iterator(fp, self.blocksize): + self.append(self._hash_block(block)) \ No newline at end of file diff --git a/tools/lib/transfer.py b/tools/lib/transfer.py new file mode 100644 index 0000000..65922a0 --- /dev/null +++ b/tools/lib/transfer.py @@ -0,0 +1,69 @@ +# Copyright 2011 GRNET S.A. All rights reserved. +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# 1. Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# 2. Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS +# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# The views and conclusions contained in the software and +# documentation are those of the authors and should not be +# interpreted as representing official policies, either expressed +# or implied, of GRNET S.A. + +from hashmap import HashMap +from binascii import hexlify, unhexlify +from cStringIO import StringIO +from lib.client import Fault + +import os +import sys + +def smart_upload(client, file, blocksize, blockhash): + dest_container = 'pithos' + dest_object = os.path.split(file)[-1] + + size = os.path.getsize(file) + hashes = HashMap(sys.argv[1], blocksize, blockhash) + map = {'bytes': size, 'hashes': [hexlify(x) for x in hashes]} + + try: + client.create_object_by_hashmap(dest_container, dest_object, map) + except Fault, fault: + if fault.status != 409: + raise + else: + return + + missing = fault.data.split('\n') + if '' in missing: + del missing[missing.index(''):] + + with open(file) as fp: + for hash in missing: + offset = hashes.index(unhexlify(hash)) * BLOCK_SIZE + fp.seek(offset) + block = fp.read(BLOCK_SIZE) + client.create_object('pithos', '.upload', StringIO(block)) + + client.create_object_by_hashmap(dest_container, dest_object, map) \ No newline at end of file diff --git a/tools/psend b/tools/psend index b40c59d..07c0001 100755 --- a/tools/psend +++ b/tools/psend @@ -1,98 +1,20 @@ #!/usr/bin/env python -import os -import hashlib import sys +import os -from binascii import hexlify, unhexlify -from cStringIO import StringIO - -from lib.client import Pithos_Client, Fault +from lib.client import Pithos_Client from lib.util import get_user, get_auth, get_server, get_api - +from lib.transfer import smart_upload # XXX Get these from container... BLOCK_SIZE = 4 * 1024 * 1024 BLOCK_HASH = 'sha256' - -def file_read_iterator(fp, size=1024): - while True: - data = fp.read(size) - if not data: - break - yield data - - -class HashMap(list): - - def __init__(self, f): - super(HashMap, self).__init__() - self.load(f) - - def _hash_raw(self, v): - h = hashlib.new(BLOCK_HASH) - h.update(v) - return h.digest() - - def _hash_block(self, v): - return self._hash_raw(v.rstrip('\x00')) - - def hash(self): - if len(self) == 0: - return self._hash_raw('') - if len(self) == 1: - return self.__getitem__(0) - - h = list(self) - s = 2 - while s < len(h): - s = s * 2 - h += [('\x00' * len(h[0]))] * (s - len(h)) - while len(h) > 1: - h = [self._hash_raw(h[x] + h[x + 1]) for x in range(0, len(h), 2)] - return h[0] - - def load(self, f): - with open(f) as fp: - for block in file_read_iterator(fp, BLOCK_SIZE): - self.append(self._hash_block(block)) - - -def smart_upload(client, file): - dest_container = 'pithos' - dest_object = os.path.split(file)[-1] - - size = os.path.getsize(file) - hashes = HashMap(sys.argv[1]) - map = {'bytes': size, 'hashes': [hexlify(x) for x in hashes]} - - try: - client.create_object_by_hashmap(dest_container, dest_object, map) - except Fault, fault: - if fault.status != 409: - raise - else: - return - - missing = fault.data.split('\n') - if '' in missing: - del missing[missing.index(''):] - - with open(file) as fp: - for hash in missing: - offset = hashes.index(unhexlify(hash)) * BLOCK_SIZE - fp.seek(offset) - block = fp.read(BLOCK_SIZE) - client.create_object('pithos', '.upload', StringIO(block)) - - client.create_object_by_hashmap(dest_container, dest_object, map) - - if __name__ == '__main__': if len(sys.argv) != 2 or not os.path.isfile(sys.argv[1]): print 'syntax: %s ' % sys.argv[0] sys.exit(1) client = Pithos_Client(get_server(), get_auth(), get_user()) - smart_upload(client, sys.argv[1]) + smart_upload(client, sys.argv[1], BLOCK_SIZE, BLOCK_HASH)