1 # Copyright 2011 GRNET S.A. All rights reserved.
3 # Redistribution and use in source and binary forms, with or
4 # without modification, are permitted provided that the following
7 # 1. Redistributions of source code must retain the above
8 # copyright notice, this list of conditions and the following
11 # 2. Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following
13 # disclaimer in the documentation and/or other materials
14 # provided with the distribution.
16 # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17 # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 # POSSIBILITY OF SUCH DAMAGE.
29 # The views and conclusions contained in the software and
30 # documentation are those of the authors and should not be
31 # interpreted as representing official policies, either expressed
32 # or implied, of GRNET S.A.
34 from os import makedirs
35 from os.path import isdir, realpath, exists, join
36 from hashlib import new as newhasher
37 from binascii import hexlify
39 from context_file import ContextFile, file_sync_read_chunks
42 class Blocker(object):
44 Required contstructor parameters: blocksize, blockpath, hashtype.
51 def __init__(self, **params):
52 blocksize = params['blocksize']
53 blockpath = params['blockpath']
54 blockpath = realpath(blockpath)
55 if not isdir(blockpath):
56 if not exists(blockpath):
59 raise ValueError("Variable blockpath '%s' is not a directory" % (blockpath,))
61 hashtype = params['hashtype']
63 hasher = newhasher(hashtype)
65 msg = "Variable hashtype '%s' is not available from hashlib"
66 raise ValueError(msg % (hashtype,))
69 emptyhash = hasher.digest()
71 self.blocksize = blocksize
72 self.blockpath = blockpath
73 self.hashtype = hashtype
74 self.hashlen = len(emptyhash)
75 self.emptyhash = emptyhash
77 def _get_rear_block(self, blkhash, create=0):
78 filename = hexlify(blkhash)
79 dir = join(self.blockpath, filename[0:2], filename[2:4], filename[4:6])
82 name = join(dir, filename)
83 return ContextFile(name, create)
85 def _check_rear_block(self, blkhash):
86 filename = hexlify(blkhash)
87 dir = join(self.blockpath, filename[0:2], filename[2:4], filename[4:6])
88 name = join(dir, filename)
91 def block_hash(self, data):
92 """Hash a block of data"""
93 hasher = newhasher(self.hashtype)
94 hasher.update(data.rstrip('\x00'))
95 return hasher.digest()
97 def block_ping(self, hashes):
98 """Check hashes for existence and
99 return those missing from block storage.
102 append = missing.append
103 for i, h in enumerate(hashes):
104 if not self._check_rear_block(h):
108 def block_retr(self, hashes):
109 """Retrieve blocks from storage by their hashes."""
110 blocksize = self.blocksize
112 append = blocks.append
116 with self._get_rear_block(h, 0) as rbl:
119 for block in rbl.sync_read_chunks(blocksize, 1, 0):
120 break # there should be just one block there
127 def block_stor(self, blocklist):
128 """Store a bunch of blocks and return (hashes, missing).
129 Hashes is a list of the hashes of the blocks,
130 missing is a list of indices in that list indicating
131 which blocks were missing from the store.
133 block_hash = self.block_hash
134 hashlist = [block_hash(b) for b in blocklist]
136 missing = self.block_ping(hashlist)
138 with self._get_rear_block(hashlist[i], 1) as rbl:
139 rbl.sync_write(blocklist[i]) #XXX: verify?
141 return hashlist, missing
143 def block_delta(self, blkhash, offdata=()):
144 """Construct and store a new block from a given block
145 and a list of (offset, data) 'patches'. Return:
146 (the hash of the new block, if the block already existed)
151 blocksize = self.blocksize
152 block = self.block_retr((blkhash,))
161 for off, data in offdata:
165 newblock += block[idx:off] + data
166 size += off - idx + len(data)
167 if size >= blocksize:
172 newblock += block[size:len(block)]
174 h, a = self.block_stor((newblock,))
175 return h[0], 1 if a else 0
177 def block_hash_file(self, openfile):
178 """Return the list of hashes (hashes map)
179 for the blocks in a buffered file.
180 Helper method, does not affect store.
183 append = hashes.append
184 block_hash = self.block_hash
186 for block in file_sync_read_chunks(openfile, self.blocksize, 1, 0):
187 append(block_hash(block))
191 def block_stor_file(self, openfile):
192 """Read blocks from buffered file object and store them. Return:
193 (bytes read, list of hashes, list of hashes that were missing)
195 blocksize = self.blocksize
196 block_stor = self.block_stor
198 hextend = hashlist.extend
200 sextend = storedlist.extend
203 for block in file_sync_read_chunks(openfile, blocksize, 1, 0):
204 hl, sl = block_stor((block,))
207 lastsize = len(block)
209 size = (len(hashlist) -1) * blocksize + lastsize if hashlist else 0
210 return size, hashlist, storedlist