1 # Copyright 2011 GRNET S.A. All rights reserved.
3 # Redistribution and use in source and binary forms, with or
4 # without modification, are permitted provided that the following
7 # 1. Redistributions of source code must retain the above
8 # copyright notice, this list of conditions and the following
11 # 2. Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following
13 # disclaimer in the documentation and/or other materials
14 # provided with the distribution.
16 # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17 # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 # POSSIBILITY OF SUCH DAMAGE.
29 # The views and conclusions contained in the software and
30 # documentation are those of the authors and should not be
31 # interpreted as representing official policies, either expressed
32 # or implied, of GRNET S.A.
34 from os import makedirs
35 from os.path import isdir, realpath, exists, join
36 from hashlib import new as newhasher
37 from binascii import hexlify
39 from context_file import ContextFile, file_sync_read_chunks
42 class Blocker(object):
44 Required contstructor parameters: blocksize, blockpath, hashtype.
51 def __init__(self, **params):
52 blocksize = params['blocksize']
53 blockpath = params['blockpath']
54 blockpath = realpath(blockpath)
55 if not isdir(blockpath):
56 if not exists(blockpath):
59 raise ValueError("Variable blockpath '%s' is not a directory" % (blockpath,))
61 hashtype = params['hashtype']
63 hasher = newhasher(hashtype)
65 msg = "Variable hashtype '%s' is not available from hashlib"
66 raise ValueError(msg % (hashtype,))
69 emptyhash = hasher.digest()
71 self.blocksize = blocksize
72 self.blockpath = blockpath
73 self.hashtype = hashtype
74 self.hashlen = len(emptyhash)
75 self.emptyhash = emptyhash
77 def _get_rear_block(self, blkhash, create=0):
78 filename = hexlify(blkhash)
79 dir = join(self.blockpath, filename[0:2], filename[2:4], filename[4:6])
82 name = join(dir, filename)
83 return ContextFile(name, create)
85 def _check_rear_block(self, blkhash):
86 filename = hexlify(blkhash)
87 dir = join(self.blockpath, filename[0:2], filename[2:4], filename[4:6])
88 name = join(dir, filename)
91 def block_hash(self, data):
92 """Hash a block of data"""
93 hasher = newhasher(self.hashtype)
94 hasher.update(data.rstrip('\x00'))
95 return hasher.digest()
97 def block_ping(self, hashes):
98 """Check hashes for existence and
99 return those missing from block storage.
102 append = notfound.append
105 if h not in notfound and not self._check_rear_block(h):
110 def block_retr(self, hashes):
111 """Retrieve blocks from storage by their hashes."""
112 blocksize = self.blocksize
114 append = blocks.append
118 with self._get_rear_block(h, 0) as rbl:
121 for block in rbl.sync_read_chunks(blocksize, 1, 0):
122 break # there should be just one block there
129 def block_stor(self, blocklist):
130 """Store a bunch of blocks and return (hashes, missing).
131 Hashes is a list of the hashes of the blocks,
132 missing is a list of indices in that list indicating
133 which blocks were missing from the store.
135 block_hash = self.block_hash
136 hashlist = [block_hash(b) for b in blocklist]
138 missing = [i for i, h in enumerate(hashlist) if not self._check_rear_block(h)]
140 with self._get_rear_block(hashlist[i], 1) as rbl:
141 rbl.sync_write(blocklist[i]) #XXX: verify?
143 return hashlist, missing
145 def block_delta(self, blkhash, offdata=()):
146 """Construct and store a new block from a given block
147 and a list of (offset, data) 'patches'. Return:
148 (the hash of the new block, if the block already existed)
153 blocksize = self.blocksize
154 block = self.block_retr((blkhash,))
163 for off, data in offdata:
167 newblock += block[idx:off] + data
168 size += off - idx + len(data)
169 if size >= blocksize:
174 newblock += block[size:len(block)]
176 h, a = self.block_stor((newblock,))
177 return h[0], 1 if a else 0
179 def block_hash_file(self, openfile):
180 """Return the list of hashes (hashes map)
181 for the blocks in a buffered file.
182 Helper method, does not affect store.
185 append = hashes.append
186 block_hash = self.block_hash
188 for block in file_sync_read_chunks(openfile, self.blocksize, 1, 0):
189 append(block_hash(block))
193 def block_stor_file(self, openfile):
194 """Read blocks from buffered file object and store them. Return:
195 (bytes read, list of hashes, list of hashes that were missing)
197 blocksize = self.blocksize
198 block_stor = self.block_stor
200 hextend = hashlist.extend
202 sextend = storedlist.extend
205 for block in file_sync_read_chunks(openfile, blocksize, 1, 0):
206 hl, sl = block_stor((block,))
209 lastsize = len(block)
211 size = (len(hashlist) -1) * blocksize + lastsize if hashlist else 0
212 return size, hashlist, storedlist