root / snf-pithos-backend / pithos / backends / lib / hashfiler / fileblocker.py @ 4a7b190f
History | View | Annotate | Download (7.1 kB)
1 | c30635bf | Filippos Giannakos | # Copyright 2011-2012 GRNET S.A. All rights reserved.
|
---|---|---|---|
2 | 29148653 | Sofia Papagiannaki | #
|
3 | c30635bf | Filippos Giannakos | # Redistribution and use in source and binary forms, with or
|
4 | c30635bf | Filippos Giannakos | # without modification, are permitted provided that the following
|
5 | c30635bf | Filippos Giannakos | # conditions are met:
|
6 | 29148653 | Sofia Papagiannaki | #
|
7 | c30635bf | Filippos Giannakos | # 1. Redistributions of source code must retain the above
|
8 | c30635bf | Filippos Giannakos | # copyright notice, this list of conditions and the following
|
9 | c30635bf | Filippos Giannakos | # disclaimer.
|
10 | 29148653 | Sofia Papagiannaki | #
|
11 | c30635bf | Filippos Giannakos | # 2. Redistributions in binary form must reproduce the above
|
12 | c30635bf | Filippos Giannakos | # copyright notice, this list of conditions and the following
|
13 | c30635bf | Filippos Giannakos | # disclaimer in the documentation and/or other materials
|
14 | c30635bf | Filippos Giannakos | # provided with the distribution.
|
15 | 29148653 | Sofia Papagiannaki | #
|
16 | c30635bf | Filippos Giannakos | # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
|
17 | c30635bf | Filippos Giannakos | # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
18 | c30635bf | Filippos Giannakos | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
19 | c30635bf | Filippos Giannakos | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
|
20 | c30635bf | Filippos Giannakos | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
21 | c30635bf | Filippos Giannakos | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
22 | c30635bf | Filippos Giannakos | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
23 | c30635bf | Filippos Giannakos | # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
24 | c30635bf | Filippos Giannakos | # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
25 | c30635bf | Filippos Giannakos | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
26 | c30635bf | Filippos Giannakos | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
27 | c30635bf | Filippos Giannakos | # POSSIBILITY OF SUCH DAMAGE.
|
28 | 29148653 | Sofia Papagiannaki | #
|
29 | c30635bf | Filippos Giannakos | # The views and conclusions contained in the software and
|
30 | c30635bf | Filippos Giannakos | # documentation are those of the authors and should not be
|
31 | c30635bf | Filippos Giannakos | # interpreted as representing official policies, either expressed
|
32 | c30635bf | Filippos Giannakos | # or implied, of GRNET S.A.
|
33 | c30635bf | Filippos Giannakos | |
34 | c30635bf | Filippos Giannakos | from os import makedirs |
35 | c30635bf | Filippos Giannakos | from os.path import isdir, realpath, exists, join |
36 | c30635bf | Filippos Giannakos | from hashlib import new as newhasher |
37 | c30635bf | Filippos Giannakos | from binascii import hexlify |
38 | c30635bf | Filippos Giannakos | |
39 | c30635bf | Filippos Giannakos | from context_file import ContextFile, file_sync_read_chunks |
40 | c30635bf | Filippos Giannakos | |
41 | c30635bf | Filippos Giannakos | |
42 | c30635bf | Filippos Giannakos | class FileBlocker(object): |
43 | c30635bf | Filippos Giannakos | """Blocker.
|
44 | c30635bf | Filippos Giannakos | Required constructor parameters: blocksize, blockpath, hashtype.
|
45 | c30635bf | Filippos Giannakos | """
|
46 | c30635bf | Filippos Giannakos | |
47 | c30635bf | Filippos Giannakos | blocksize = None
|
48 | c30635bf | Filippos Giannakos | blockpath = None
|
49 | c30635bf | Filippos Giannakos | hashtype = None
|
50 | c30635bf | Filippos Giannakos | |
51 | c30635bf | Filippos Giannakos | def __init__(self, **params): |
52 | c30635bf | Filippos Giannakos | blocksize = params['blocksize']
|
53 | c30635bf | Filippos Giannakos | blockpath = params['blockpath']
|
54 | c30635bf | Filippos Giannakos | blockpath = realpath(blockpath) |
55 | c30635bf | Filippos Giannakos | if not isdir(blockpath): |
56 | c30635bf | Filippos Giannakos | if not exists(blockpath): |
57 | c30635bf | Filippos Giannakos | makedirs(blockpath) |
58 | c30635bf | Filippos Giannakos | else:
|
59 | 29148653 | Sofia Papagiannaki | raise ValueError("Variable blockpath '%s' is not a directory" % |
60 | 29148653 | Sofia Papagiannaki | (blockpath,)) |
61 | c30635bf | Filippos Giannakos | |
62 | c30635bf | Filippos Giannakos | hashtype = params['hashtype']
|
63 | c30635bf | Filippos Giannakos | try:
|
64 | c30635bf | Filippos Giannakos | hasher = newhasher(hashtype) |
65 | c30635bf | Filippos Giannakos | except ValueError: |
66 | c30635bf | Filippos Giannakos | msg = "Variable hashtype '%s' is not available from hashlib"
|
67 | c30635bf | Filippos Giannakos | raise ValueError(msg % (hashtype,)) |
68 | c30635bf | Filippos Giannakos | |
69 | c30635bf | Filippos Giannakos | hasher.update("")
|
70 | c30635bf | Filippos Giannakos | emptyhash = hasher.digest() |
71 | c30635bf | Filippos Giannakos | |
72 | c30635bf | Filippos Giannakos | self.blocksize = blocksize
|
73 | c30635bf | Filippos Giannakos | self.blockpath = blockpath
|
74 | c30635bf | Filippos Giannakos | self.hashtype = hashtype
|
75 | c30635bf | Filippos Giannakos | self.hashlen = len(emptyhash) |
76 | c30635bf | Filippos Giannakos | self.emptyhash = emptyhash
|
77 | c30635bf | Filippos Giannakos | |
78 | c30635bf | Filippos Giannakos | def _pad(self, block): |
79 | c30635bf | Filippos Giannakos | return block + ('\x00' * (self.blocksize - len(block))) |
80 | c30635bf | Filippos Giannakos | |
81 | c30635bf | Filippos Giannakos | def _get_rear_block(self, blkhash, create=0): |
82 | c30635bf | Filippos Giannakos | filename = hexlify(blkhash) |
83 | c30635bf | Filippos Giannakos | dir = join(self.blockpath, filename[0:2], filename[2:4], filename[4:6]) |
84 | c30635bf | Filippos Giannakos | if not exists(dir): |
85 | c30635bf | Filippos Giannakos | makedirs(dir)
|
86 | c30635bf | Filippos Giannakos | name = join(dir, filename)
|
87 | c30635bf | Filippos Giannakos | return ContextFile(name, create)
|
88 | c30635bf | Filippos Giannakos | |
89 | c30635bf | Filippos Giannakos | def _check_rear_block(self, blkhash): |
90 | c30635bf | Filippos Giannakos | filename = hexlify(blkhash) |
91 | c30635bf | Filippos Giannakos | dir = join(self.blockpath, filename[0:2], filename[2:4], filename[4:6]) |
92 | c30635bf | Filippos Giannakos | name = join(dir, filename)
|
93 | c30635bf | Filippos Giannakos | return exists(name)
|
94 | c30635bf | Filippos Giannakos | |
95 | c30635bf | Filippos Giannakos | def block_hash(self, data): |
96 | c30635bf | Filippos Giannakos | """Hash a block of data"""
|
97 | c30635bf | Filippos Giannakos | hasher = newhasher(self.hashtype)
|
98 | c30635bf | Filippos Giannakos | hasher.update(data.rstrip('\x00'))
|
99 | c30635bf | Filippos Giannakos | return hasher.digest()
|
100 | c30635bf | Filippos Giannakos | |
101 | c30635bf | Filippos Giannakos | def block_ping(self, hashes): |
102 | c30635bf | Filippos Giannakos | """Check hashes for existence and
|
103 | c30635bf | Filippos Giannakos | return those missing from block storage.
|
104 | c30635bf | Filippos Giannakos | """
|
105 | c30635bf | Filippos Giannakos | notfound = [] |
106 | c30635bf | Filippos Giannakos | append = notfound.append |
107 | c30635bf | Filippos Giannakos | |
108 | c30635bf | Filippos Giannakos | for h in hashes: |
109 | c30635bf | Filippos Giannakos | if h not in notfound and not self._check_rear_block(h): |
110 | c30635bf | Filippos Giannakos | append(h) |
111 | c30635bf | Filippos Giannakos | |
112 | c30635bf | Filippos Giannakos | return notfound
|
113 | c30635bf | Filippos Giannakos | |
114 | c30635bf | Filippos Giannakos | def block_retr(self, hashes): |
115 | c30635bf | Filippos Giannakos | """Retrieve blocks from storage by their hashes."""
|
116 | c30635bf | Filippos Giannakos | blocksize = self.blocksize
|
117 | c30635bf | Filippos Giannakos | blocks = [] |
118 | c30635bf | Filippos Giannakos | append = blocks.append |
119 | c30635bf | Filippos Giannakos | block = None
|
120 | c30635bf | Filippos Giannakos | |
121 | c30635bf | Filippos Giannakos | for h in hashes: |
122 | c30635bf | Filippos Giannakos | if h == self.emptyhash: |
123 | c30635bf | Filippos Giannakos | append(self._pad('')) |
124 | c30635bf | Filippos Giannakos | continue
|
125 | c30635bf | Filippos Giannakos | with self._get_rear_block(h, 0) as rbl: |
126 | c30635bf | Filippos Giannakos | if not rbl: |
127 | c30635bf | Filippos Giannakos | break
|
128 | c30635bf | Filippos Giannakos | for block in rbl.sync_read_chunks(blocksize, 1, 0): |
129 | 29148653 | Sofia Papagiannaki | break # there should be just one block there |
130 | c30635bf | Filippos Giannakos | if not block: |
131 | c30635bf | Filippos Giannakos | break
|
132 | c30635bf | Filippos Giannakos | append(self._pad(block))
|
133 | c30635bf | Filippos Giannakos | |
134 | c30635bf | Filippos Giannakos | return blocks
|
135 | c30635bf | Filippos Giannakos | |
136 | c30635bf | Filippos Giannakos | def block_stor(self, blocklist): |
137 | c30635bf | Filippos Giannakos | """Store a bunch of blocks and return (hashes, missing).
|
138 | c30635bf | Filippos Giannakos | Hashes is a list of the hashes of the blocks,
|
139 | c30635bf | Filippos Giannakos | missing is a list of indices in that list indicating
|
140 | c30635bf | Filippos Giannakos | which blocks were missing from the store.
|
141 | c30635bf | Filippos Giannakos | """
|
142 | c30635bf | Filippos Giannakos | block_hash = self.block_hash
|
143 | c30635bf | Filippos Giannakos | hashlist = [block_hash(b) for b in blocklist] |
144 | 29148653 | Sofia Papagiannaki | missing = [i for i, h in enumerate(hashlist) if not |
145 | 29148653 | Sofia Papagiannaki | self._check_rear_block(h)]
|
146 | c30635bf | Filippos Giannakos | for i in missing: |
147 | c30635bf | Filippos Giannakos | with self._get_rear_block(hashlist[i], 1) as rbl: |
148 | 29148653 | Sofia Papagiannaki | rbl.sync_write(blocklist[i]) # XXX: verify?
|
149 | c30635bf | Filippos Giannakos | |
150 | c30635bf | Filippos Giannakos | return hashlist, missing
|
151 | c30635bf | Filippos Giannakos | |
152 | c30635bf | Filippos Giannakos | def block_delta(self, blkhash, offset, data): |
153 | c30635bf | Filippos Giannakos | """Construct and store a new block from a given block
|
154 | c30635bf | Filippos Giannakos | and a data 'patch' applied at offset. Return:
|
155 | c30635bf | Filippos Giannakos | (the hash of the new block, if the block already existed)
|
156 | c30635bf | Filippos Giannakos | """
|
157 | c30635bf | Filippos Giannakos | |
158 | c30635bf | Filippos Giannakos | blocksize = self.blocksize
|
159 | c30635bf | Filippos Giannakos | if offset >= blocksize or not data: |
160 | c30635bf | Filippos Giannakos | return None, None |
161 | c30635bf | Filippos Giannakos | |
162 | c30635bf | Filippos Giannakos | block = self.block_retr((blkhash,))
|
163 | c30635bf | Filippos Giannakos | if not block: |
164 | c30635bf | Filippos Giannakos | return None, None |
165 | 29148653 | Sofia Papagiannaki | |
166 | c30635bf | Filippos Giannakos | block = block[0]
|
167 | c30635bf | Filippos Giannakos | newblock = block[:offset] + data |
168 | c30635bf | Filippos Giannakos | if len(newblock) > blocksize: |
169 | c30635bf | Filippos Giannakos | newblock = newblock[:blocksize] |
170 | c30635bf | Filippos Giannakos | elif len(newblock) < blocksize: |
171 | c30635bf | Filippos Giannakos | newblock += block[len(newblock):]
|
172 | c30635bf | Filippos Giannakos | |
173 | c30635bf | Filippos Giannakos | h, a = self.block_stor((newblock,))
|
174 | c30635bf | Filippos Giannakos | return h[0], 1 if a else 0 |
175 | c30635bf | Filippos Giannakos | |
176 | c30635bf | Filippos Giannakos | def block_hash_file(self, openfile): |
177 | c30635bf | Filippos Giannakos | """Return the list of hashes (hashes map)
|
178 | c30635bf | Filippos Giannakos | for the blocks in a buffered file.
|
179 | c30635bf | Filippos Giannakos | Helper method, does not affect store.
|
180 | c30635bf | Filippos Giannakos | """
|
181 | c30635bf | Filippos Giannakos | hashes = [] |
182 | c30635bf | Filippos Giannakos | append = hashes.append |
183 | c30635bf | Filippos Giannakos | block_hash = self.block_hash
|
184 | c30635bf | Filippos Giannakos | |
185 | c30635bf | Filippos Giannakos | for block in file_sync_read_chunks(openfile, self.blocksize, 1, 0): |
186 | c30635bf | Filippos Giannakos | append(block_hash(block)) |
187 | c30635bf | Filippos Giannakos | |
188 | c30635bf | Filippos Giannakos | return hashes
|
189 | c30635bf | Filippos Giannakos | |
190 | c30635bf | Filippos Giannakos | def block_stor_file(self, openfile): |
191 | c30635bf | Filippos Giannakos | """Read blocks from buffered file object and store them. Return:
|
192 | c30635bf | Filippos Giannakos | (bytes read, list of hashes, list of hashes that were missing)
|
193 | c30635bf | Filippos Giannakos | """
|
194 | c30635bf | Filippos Giannakos | blocksize = self.blocksize
|
195 | c30635bf | Filippos Giannakos | block_stor = self.block_stor
|
196 | c30635bf | Filippos Giannakos | hashlist = [] |
197 | c30635bf | Filippos Giannakos | hextend = hashlist.extend |
198 | c30635bf | Filippos Giannakos | storedlist = [] |
199 | c30635bf | Filippos Giannakos | sextend = storedlist.extend |
200 | c30635bf | Filippos Giannakos | lastsize = 0
|
201 | c30635bf | Filippos Giannakos | |
202 | c30635bf | Filippos Giannakos | for block in file_sync_read_chunks(openfile, blocksize, 1, 0): |
203 | c30635bf | Filippos Giannakos | hl, sl = block_stor((block,)) |
204 | c30635bf | Filippos Giannakos | hextend(hl) |
205 | c30635bf | Filippos Giannakos | sextend(sl) |
206 | c30635bf | Filippos Giannakos | lastsize = len(block)
|
207 | c30635bf | Filippos Giannakos | |
208 | 29148653 | Sofia Papagiannaki | size = (len(hashlist) - 1) * blocksize + lastsize if hashlist else 0 |
209 | c30635bf | Filippos Giannakos | return size, hashlist, storedlist |