--- /dev/null
+# Copyright 2011 GRNET S.A. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or
+# without modification, are permitted provided that the following
+# conditions are met:
+#
+# 1. Redistributions of source code must retain the above
+# copyright notice, this list of conditions and the following
+# disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
+# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# The views and conclusions contained in the software and
+# documentation are those of the authors and should not be
+# interpreted as representing official policies, either expressed
+# or implied, of GRNET S.A.
+
+import hashlib
+
+def file_read_iterator(fp, size=1024):
+ while True:
+ data = fp.read(size)
+ if not data:
+ break
+ yield data
+
+class HashMap(list):
+
+ def __init__(self, f, blocksize, blockhash):
+ super(HashMap, self).__init__()
+ self.blocksize = blocksize
+ self.blockhash = blockhash
+ self.load(f)
+
+ def _hash_raw(self, v):
+ h = hashlib.new(self.blockhash)
+ h.update(v)
+ return h.digest()
+
+ def _hash_block(self, v):
+ return self._hash_raw(v.rstrip('\x00'))
+
+ def hash(self):
+ if len(self) == 0:
+ return self._hash_raw('')
+ if len(self) == 1:
+ return self.__getitem__(0)
+
+ h = list(self)
+ s = 2
+ while s < len(h):
+ s = s * 2
+ h += [('\x00' * len(h[0]))] * (s - len(h))
+ while len(h) > 1:
+ h = [self._hash_raw(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
+ return h[0]
+
+ def load(self, f):
+ with open(f) as fp:
+ for block in file_read_iterator(fp, self.blocksize):
+ self.append(self._hash_block(block))
\ No newline at end of file
--- /dev/null
+# Copyright 2011 GRNET S.A. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or
+# without modification, are permitted provided that the following
+# conditions are met:
+#
+# 1. Redistributions of source code must retain the above
+# copyright notice, this list of conditions and the following
+# disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
+# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# The views and conclusions contained in the software and
+# documentation are those of the authors and should not be
+# interpreted as representing official policies, either expressed
+# or implied, of GRNET S.A.
+
+from hashmap import HashMap
+from binascii import hexlify, unhexlify
+from cStringIO import StringIO
+from lib.client import Fault
+
+import os
+import sys
+
+def smart_upload(client, file, blocksize, blockhash):
+ dest_container = 'pithos'
+ dest_object = os.path.split(file)[-1]
+
+ size = os.path.getsize(file)
+ hashes = HashMap(sys.argv[1], blocksize, blockhash)
+ map = {'bytes': size, 'hashes': [hexlify(x) for x in hashes]}
+
+ try:
+ client.create_object_by_hashmap(dest_container, dest_object, map)
+ except Fault, fault:
+ if fault.status != 409:
+ raise
+ else:
+ return
+
+ missing = fault.data.split('\n')
+ if '' in missing:
+ del missing[missing.index(''):]
+
+ with open(file) as fp:
+ for hash in missing:
+ offset = hashes.index(unhexlify(hash)) * BLOCK_SIZE
+ fp.seek(offset)
+ block = fp.read(BLOCK_SIZE)
+ client.create_object('pithos', '.upload', StringIO(block))
+
+ client.create_object_by_hashmap(dest_container, dest_object, map)
\ No newline at end of file
#!/usr/bin/env python
-import os
-import hashlib
import sys
+import os
-from binascii import hexlify, unhexlify
-from cStringIO import StringIO
-
-from lib.client import Pithos_Client, Fault
+from lib.client import Pithos_Client
from lib.util import get_user, get_auth, get_server, get_api
-
+from lib.transfer import smart_upload
# XXX Get these from container...
BLOCK_SIZE = 4 * 1024 * 1024
BLOCK_HASH = 'sha256'
-
-def file_read_iterator(fp, size=1024):
- while True:
- data = fp.read(size)
- if not data:
- break
- yield data
-
-
-class HashMap(list):
-
- def __init__(self, f):
- super(HashMap, self).__init__()
- self.load(f)
-
- def _hash_raw(self, v):
- h = hashlib.new(BLOCK_HASH)
- h.update(v)
- return h.digest()
-
- def _hash_block(self, v):
- return self._hash_raw(v.rstrip('\x00'))
-
- def hash(self):
- if len(self) == 0:
- return self._hash_raw('')
- if len(self) == 1:
- return self.__getitem__(0)
-
- h = list(self)
- s = 2
- while s < len(h):
- s = s * 2
- h += [('\x00' * len(h[0]))] * (s - len(h))
- while len(h) > 1:
- h = [self._hash_raw(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
- return h[0]
-
- def load(self, f):
- with open(f) as fp:
- for block in file_read_iterator(fp, BLOCK_SIZE):
- self.append(self._hash_block(block))
-
-
-def smart_upload(client, file):
- dest_container = 'pithos'
- dest_object = os.path.split(file)[-1]
-
- size = os.path.getsize(file)
- hashes = HashMap(sys.argv[1])
- map = {'bytes': size, 'hashes': [hexlify(x) for x in hashes]}
-
- try:
- client.create_object_by_hashmap(dest_container, dest_object, map)
- except Fault, fault:
- if fault.status != 409:
- raise
- else:
- return
-
- missing = fault.data.split('\n')
- if '' in missing:
- del missing[missing.index(''):]
-
- with open(file) as fp:
- for hash in missing:
- offset = hashes.index(unhexlify(hash)) * BLOCK_SIZE
- fp.seek(offset)
- block = fp.read(BLOCK_SIZE)
- client.create_object('pithos', '.upload', StringIO(block))
-
- client.create_object_by_hashmap(dest_container, dest_object, map)
-
-
if __name__ == '__main__':
if len(sys.argv) != 2 or not os.path.isfile(sys.argv[1]):
print 'syntax: %s <file>' % sys.argv[0]
sys.exit(1)
client = Pithos_Client(get_server(), get_auth(), get_user())
- smart_upload(client, sys.argv[1])
+ smart_upload(client, sys.argv[1], BLOCK_SIZE, BLOCK_HASH)