root / pithos / backends / lib / hashfiler / blocker.py @ f897bea9
History | View | Annotate | Download (7.1 kB)
1 | a9b3f29d | Antony Chazapis | # Copyright 2011 GRNET S.A. All rights reserved.
|
---|---|---|---|
2 | a9b3f29d | Antony Chazapis | #
|
3 | a9b3f29d | Antony Chazapis | # Redistribution and use in source and binary forms, with or
|
4 | a9b3f29d | Antony Chazapis | # without modification, are permitted provided that the following
|
5 | a9b3f29d | Antony Chazapis | # conditions are met:
|
6 | a9b3f29d | Antony Chazapis | #
|
7 | a9b3f29d | Antony Chazapis | # 1. Redistributions of source code must retain the above
|
8 | a9b3f29d | Antony Chazapis | # copyright notice, this list of conditions and the following
|
9 | a9b3f29d | Antony Chazapis | # disclaimer.
|
10 | a9b3f29d | Antony Chazapis | #
|
11 | a9b3f29d | Antony Chazapis | # 2. Redistributions in binary form must reproduce the above
|
12 | a9b3f29d | Antony Chazapis | # copyright notice, this list of conditions and the following
|
13 | a9b3f29d | Antony Chazapis | # disclaimer in the documentation and/or other materials
|
14 | a9b3f29d | Antony Chazapis | # provided with the distribution.
|
15 | a9b3f29d | Antony Chazapis | #
|
16 | a9b3f29d | Antony Chazapis | # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
|
17 | a9b3f29d | Antony Chazapis | # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
18 | a9b3f29d | Antony Chazapis | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
19 | a9b3f29d | Antony Chazapis | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
|
20 | a9b3f29d | Antony Chazapis | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
21 | a9b3f29d | Antony Chazapis | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
22 | a9b3f29d | Antony Chazapis | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
23 | a9b3f29d | Antony Chazapis | # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
24 | a9b3f29d | Antony Chazapis | # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
25 | a9b3f29d | Antony Chazapis | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
26 | a9b3f29d | Antony Chazapis | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
27 | a9b3f29d | Antony Chazapis | # POSSIBILITY OF SUCH DAMAGE.
|
28 | a9b3f29d | Antony Chazapis | #
|
29 | a9b3f29d | Antony Chazapis | # The views and conclusions contained in the software and
|
30 | a9b3f29d | Antony Chazapis | # documentation are those of the authors and should not be
|
31 | a9b3f29d | Antony Chazapis | # interpreted as representing official policies, either expressed
|
32 | a9b3f29d | Antony Chazapis | # or implied, of GRNET S.A.
|
33 | a9b3f29d | Antony Chazapis | |
34 | a9b3f29d | Antony Chazapis | from os import makedirs |
35 | a9b3f29d | Antony Chazapis | from os.path import isdir, realpath, exists, join |
36 | a9b3f29d | Antony Chazapis | from hashlib import new as newhasher |
37 | a9b3f29d | Antony Chazapis | from binascii import hexlify |
38 | a9b3f29d | Antony Chazapis | |
39 | 6f4bce7b | Antony Chazapis | from context_file import ContextFile, file_sync_read_chunks |
40 | a9b3f29d | Antony Chazapis | |
41 | a9b3f29d | Antony Chazapis | |
42 | a9b3f29d | Antony Chazapis | class Blocker(object): |
43 | a9b3f29d | Antony Chazapis | """Blocker.
|
44 | a9b3f29d | Antony Chazapis | Required contstructor parameters: blocksize, blockpath, hashtype.
|
45 | a9b3f29d | Antony Chazapis | """
|
46 | a9b3f29d | Antony Chazapis | |
47 | a9b3f29d | Antony Chazapis | blocksize = None
|
48 | a9b3f29d | Antony Chazapis | blockpath = None
|
49 | a9b3f29d | Antony Chazapis | hashtype = None
|
50 | a9b3f29d | Antony Chazapis | |
51 | a9b3f29d | Antony Chazapis | def __init__(self, **params): |
52 | a9b3f29d | Antony Chazapis | blocksize = params['blocksize']
|
53 | a9b3f29d | Antony Chazapis | blockpath = params['blockpath']
|
54 | a9b3f29d | Antony Chazapis | blockpath = realpath(blockpath) |
55 | a9b3f29d | Antony Chazapis | if not isdir(blockpath): |
56 | a9b3f29d | Antony Chazapis | if not exists(blockpath): |
57 | a9b3f29d | Antony Chazapis | makedirs(blockpath) |
58 | a9b3f29d | Antony Chazapis | else:
|
59 | a9b3f29d | Antony Chazapis | raise ValueError("Variable blockpath '%s' is not a directory" % (blockpath,)) |
60 | a9b3f29d | Antony Chazapis | |
61 | a9b3f29d | Antony Chazapis | hashtype = params['hashtype']
|
62 | a9b3f29d | Antony Chazapis | try:
|
63 | a9b3f29d | Antony Chazapis | hasher = newhasher(hashtype) |
64 | a9b3f29d | Antony Chazapis | except ValueError: |
65 | a9b3f29d | Antony Chazapis | msg = "Variable hashtype '%s' is not available from hashlib"
|
66 | a9b3f29d | Antony Chazapis | raise ValueError(msg % (hashtype,)) |
67 | a9b3f29d | Antony Chazapis | |
68 | a9b3f29d | Antony Chazapis | hasher.update("")
|
69 | a9b3f29d | Antony Chazapis | emptyhash = hasher.digest() |
70 | a9b3f29d | Antony Chazapis | |
71 | a9b3f29d | Antony Chazapis | self.blocksize = blocksize
|
72 | a9b3f29d | Antony Chazapis | self.blockpath = blockpath
|
73 | a9b3f29d | Antony Chazapis | self.hashtype = hashtype
|
74 | a9b3f29d | Antony Chazapis | self.hashlen = len(emptyhash) |
75 | a9b3f29d | Antony Chazapis | self.emptyhash = emptyhash
|
76 | a9b3f29d | Antony Chazapis | |
77 | f4b61e0c | Antony Chazapis | def _get_rear_block(self, blkhash, create=0): |
78 | 3d9e4bf8 | Antony Chazapis | filename = hexlify(blkhash) |
79 | 3d9e4bf8 | Antony Chazapis | dir = join(self.blockpath, filename[0:2], filename[2:4], filename[4:6]) |
80 | 3d9e4bf8 | Antony Chazapis | if not exists(dir): |
81 | 3d9e4bf8 | Antony Chazapis | makedirs(dir)
|
82 | 3d9e4bf8 | Antony Chazapis | name = join(dir, filename)
|
83 | a9b3f29d | Antony Chazapis | return ContextFile(name, create)
|
84 | a9b3f29d | Antony Chazapis | |
85 | f4b61e0c | Antony Chazapis | def _check_rear_block(self, blkhash): |
86 | 3d9e4bf8 | Antony Chazapis | filename = hexlify(blkhash) |
87 | 3d9e4bf8 | Antony Chazapis | dir = join(self.blockpath, filename[0:2], filename[2:4], filename[4:6]) |
88 | 3d9e4bf8 | Antony Chazapis | name = join(dir, filename)
|
89 | a9b3f29d | Antony Chazapis | return exists(name)
|
90 | a9b3f29d | Antony Chazapis | |
91 | a9b3f29d | Antony Chazapis | def block_hash(self, data): |
92 | a9b3f29d | Antony Chazapis | """Hash a block of data"""
|
93 | a9b3f29d | Antony Chazapis | hasher = newhasher(self.hashtype)
|
94 | a9b3f29d | Antony Chazapis | hasher.update(data.rstrip('\x00'))
|
95 | a9b3f29d | Antony Chazapis | return hasher.digest()
|
96 | a9b3f29d | Antony Chazapis | |
97 | a9b3f29d | Antony Chazapis | def block_ping(self, hashes): |
98 | a9b3f29d | Antony Chazapis | """Check hashes for existence and
|
99 | a9b3f29d | Antony Chazapis | return those missing from block storage.
|
100 | a9b3f29d | Antony Chazapis | """
|
101 | b2481861 | Antony Chazapis | notfound = [] |
102 | b2481861 | Antony Chazapis | append = notfound.append |
103 | b2481861 | Antony Chazapis | |
104 | b2481861 | Antony Chazapis | for h in hashes: |
105 | b2481861 | Antony Chazapis | if h not in notfound and not self._check_rear_block(h): |
106 | b2481861 | Antony Chazapis | append(h) |
107 | b2481861 | Antony Chazapis | |
108 | b2481861 | Antony Chazapis | return notfound
|
109 | a9b3f29d | Antony Chazapis | |
110 | a9b3f29d | Antony Chazapis | def block_retr(self, hashes): |
111 | a9b3f29d | Antony Chazapis | """Retrieve blocks from storage by their hashes."""
|
112 | a9b3f29d | Antony Chazapis | blocksize = self.blocksize
|
113 | a9b3f29d | Antony Chazapis | blocks = [] |
114 | a9b3f29d | Antony Chazapis | append = blocks.append |
115 | a9b3f29d | Antony Chazapis | block = None
|
116 | a9b3f29d | Antony Chazapis | |
117 | a9b3f29d | Antony Chazapis | for h in hashes: |
118 | a1fc118e | Antony Chazapis | if h == self.emptyhash: |
119 | a1fc118e | Antony Chazapis | append('')
|
120 | a1fc118e | Antony Chazapis | continue
|
121 | f4b61e0c | Antony Chazapis | with self._get_rear_block(h, 0) as rbl: |
122 | a9b3f29d | Antony Chazapis | if not rbl: |
123 | a9b3f29d | Antony Chazapis | break
|
124 | a9b3f29d | Antony Chazapis | for block in rbl.sync_read_chunks(blocksize, 1, 0): |
125 | a9b3f29d | Antony Chazapis | break # there should be just one block there |
126 | a9b3f29d | Antony Chazapis | if not block: |
127 | a9b3f29d | Antony Chazapis | break
|
128 | a9b3f29d | Antony Chazapis | append(block) |
129 | a9b3f29d | Antony Chazapis | |
130 | a9b3f29d | Antony Chazapis | return blocks
|
131 | a9b3f29d | Antony Chazapis | |
132 | a9b3f29d | Antony Chazapis | def block_stor(self, blocklist): |
133 | a9b3f29d | Antony Chazapis | """Store a bunch of blocks and return (hashes, missing).
|
134 | a9b3f29d | Antony Chazapis | Hashes is a list of the hashes of the blocks,
|
135 | a9b3f29d | Antony Chazapis | missing is a list of indices in that list indicating
|
136 | a9b3f29d | Antony Chazapis | which blocks were missing from the store.
|
137 | a9b3f29d | Antony Chazapis | """
|
138 | a9b3f29d | Antony Chazapis | block_hash = self.block_hash
|
139 | a9b3f29d | Antony Chazapis | hashlist = [block_hash(b) for b in blocklist] |
140 | a9b3f29d | Antony Chazapis | mf = None
|
141 | 70f2b136 | Antony Chazapis | missing = [i for i, h in enumerate(hashlist) if not self._check_rear_block(h)] |
142 | a9b3f29d | Antony Chazapis | for i in missing: |
143 | f4b61e0c | Antony Chazapis | with self._get_rear_block(hashlist[i], 1) as rbl: |
144 | a9b3f29d | Antony Chazapis | rbl.sync_write(blocklist[i]) #XXX: verify?
|
145 | a9b3f29d | Antony Chazapis | |
146 | a9b3f29d | Antony Chazapis | return hashlist, missing
|
147 | a9b3f29d | Antony Chazapis | |
148 | a9b3f29d | Antony Chazapis | def block_delta(self, blkhash, offdata=()): |
149 | a9b3f29d | Antony Chazapis | """Construct and store a new block from a given block
|
150 | a9b3f29d | Antony Chazapis | and a list of (offset, data) 'patches'. Return:
|
151 | a9b3f29d | Antony Chazapis | (the hash of the new block, if the block already existed)
|
152 | a9b3f29d | Antony Chazapis | """
|
153 | a9b3f29d | Antony Chazapis | if not offdata: |
154 | a9b3f29d | Antony Chazapis | return None, None |
155 | a9b3f29d | Antony Chazapis | |
156 | a9b3f29d | Antony Chazapis | blocksize = self.blocksize
|
157 | a9b3f29d | Antony Chazapis | block = self.block_retr((blkhash,))
|
158 | a9b3f29d | Antony Chazapis | if not block: |
159 | a9b3f29d | Antony Chazapis | return None, None |
160 | a9b3f29d | Antony Chazapis | |
161 | a9b3f29d | Antony Chazapis | block = block[0]
|
162 | a9b3f29d | Antony Chazapis | newblock = ''
|
163 | a9b3f29d | Antony Chazapis | idx = 0
|
164 | a9b3f29d | Antony Chazapis | size = 0
|
165 | a9b3f29d | Antony Chazapis | trunc = 0
|
166 | a9b3f29d | Antony Chazapis | for off, data in offdata: |
167 | a9b3f29d | Antony Chazapis | if not data: |
168 | a9b3f29d | Antony Chazapis | trunc = 1
|
169 | a9b3f29d | Antony Chazapis | break
|
170 | a9b3f29d | Antony Chazapis | newblock += block[idx:off] + data |
171 | a9b3f29d | Antony Chazapis | size += off - idx + len(data)
|
172 | a9b3f29d | Antony Chazapis | if size >= blocksize:
|
173 | a9b3f29d | Antony Chazapis | break
|
174 | a9b3f29d | Antony Chazapis | off = size |
175 | a9b3f29d | Antony Chazapis | |
176 | a9b3f29d | Antony Chazapis | if not trunc: |
177 | a9b3f29d | Antony Chazapis | newblock += block[size:len(block)]
|
178 | a9b3f29d | Antony Chazapis | |
179 | a9b3f29d | Antony Chazapis | h, a = self.block_stor((newblock,))
|
180 | a9b3f29d | Antony Chazapis | return h[0], 1 if a else 0 |
181 | a9b3f29d | Antony Chazapis | |
182 | a9b3f29d | Antony Chazapis | def block_hash_file(self, openfile): |
183 | a9b3f29d | Antony Chazapis | """Return the list of hashes (hashes map)
|
184 | a9b3f29d | Antony Chazapis | for the blocks in a buffered file.
|
185 | a9b3f29d | Antony Chazapis | Helper method, does not affect store.
|
186 | a9b3f29d | Antony Chazapis | """
|
187 | a9b3f29d | Antony Chazapis | hashes = [] |
188 | a9b3f29d | Antony Chazapis | append = hashes.append |
189 | a9b3f29d | Antony Chazapis | block_hash = self.block_hash
|
190 | a9b3f29d | Antony Chazapis | |
191 | a9b3f29d | Antony Chazapis | for block in file_sync_read_chunks(openfile, self.blocksize, 1, 0): |
192 | a9b3f29d | Antony Chazapis | append(block_hash(block)) |
193 | a9b3f29d | Antony Chazapis | |
194 | a9b3f29d | Antony Chazapis | return hashes
|
195 | a9b3f29d | Antony Chazapis | |
196 | a9b3f29d | Antony Chazapis | def block_stor_file(self, openfile): |
197 | a9b3f29d | Antony Chazapis | """Read blocks from buffered file object and store them. Return:
|
198 | a9b3f29d | Antony Chazapis | (bytes read, list of hashes, list of hashes that were missing)
|
199 | a9b3f29d | Antony Chazapis | """
|
200 | a9b3f29d | Antony Chazapis | blocksize = self.blocksize
|
201 | a9b3f29d | Antony Chazapis | block_stor = self.block_stor
|
202 | a9b3f29d | Antony Chazapis | hashlist = [] |
203 | a9b3f29d | Antony Chazapis | hextend = hashlist.extend |
204 | a9b3f29d | Antony Chazapis | storedlist = [] |
205 | a9b3f29d | Antony Chazapis | sextend = storedlist.extend |
206 | a9b3f29d | Antony Chazapis | lastsize = 0
|
207 | a9b3f29d | Antony Chazapis | |
208 | a9b3f29d | Antony Chazapis | for block in file_sync_read_chunks(openfile, blocksize, 1, 0): |
209 | a9b3f29d | Antony Chazapis | hl, sl = block_stor((block,)) |
210 | a9b3f29d | Antony Chazapis | hextend(hl) |
211 | a9b3f29d | Antony Chazapis | sextend(sl) |
212 | a9b3f29d | Antony Chazapis | lastsize = len(block)
|
213 | a9b3f29d | Antony Chazapis | |
214 | a9b3f29d | Antony Chazapis | size = (len(hashlist) -1) * blocksize + lastsize if hashlist else 0 |
215 | a9b3f29d | Antony Chazapis | return size, hashlist, storedlist
|