root / snf-pithos-backend / pithos / backends / lib / hashfiler / fileblocker.py @ c30635bf
History | View | Annotate | Download (7 kB)
1 | c30635bf | Filippos Giannakos | # Copyright 2011-2012 GRNET S.A. All rights reserved.
|
---|---|---|---|
2 | c30635bf | Filippos Giannakos | #
|
3 | c30635bf | Filippos Giannakos | # Redistribution and use in source and binary forms, with or
|
4 | c30635bf | Filippos Giannakos | # without modification, are permitted provided that the following
|
5 | c30635bf | Filippos Giannakos | # conditions are met:
|
6 | c30635bf | Filippos Giannakos | #
|
7 | c30635bf | Filippos Giannakos | # 1. Redistributions of source code must retain the above
|
8 | c30635bf | Filippos Giannakos | # copyright notice, this list of conditions and the following
|
9 | c30635bf | Filippos Giannakos | # disclaimer.
|
10 | c30635bf | Filippos Giannakos | #
|
11 | c30635bf | Filippos Giannakos | # 2. Redistributions in binary form must reproduce the above
|
12 | c30635bf | Filippos Giannakos | # copyright notice, this list of conditions and the following
|
13 | c30635bf | Filippos Giannakos | # disclaimer in the documentation and/or other materials
|
14 | c30635bf | Filippos Giannakos | # provided with the distribution.
|
15 | c30635bf | Filippos Giannakos | #
|
16 | c30635bf | Filippos Giannakos | # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
|
17 | c30635bf | Filippos Giannakos | # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
18 | c30635bf | Filippos Giannakos | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
19 | c30635bf | Filippos Giannakos | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
|
20 | c30635bf | Filippos Giannakos | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
21 | c30635bf | Filippos Giannakos | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
22 | c30635bf | Filippos Giannakos | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
23 | c30635bf | Filippos Giannakos | # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
24 | c30635bf | Filippos Giannakos | # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
25 | c30635bf | Filippos Giannakos | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
26 | c30635bf | Filippos Giannakos | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
27 | c30635bf | Filippos Giannakos | # POSSIBILITY OF SUCH DAMAGE.
|
28 | c30635bf | Filippos Giannakos | #
|
29 | c30635bf | Filippos Giannakos | # The views and conclusions contained in the software and
|
30 | c30635bf | Filippos Giannakos | # documentation are those of the authors and should not be
|
31 | c30635bf | Filippos Giannakos | # interpreted as representing official policies, either expressed
|
32 | c30635bf | Filippos Giannakos | # or implied, of GRNET S.A.
|
33 | c30635bf | Filippos Giannakos | |
34 | c30635bf | Filippos Giannakos | from os import makedirs |
35 | c30635bf | Filippos Giannakos | from os.path import isdir, realpath, exists, join |
36 | c30635bf | Filippos Giannakos | from hashlib import new as newhasher |
37 | c30635bf | Filippos Giannakos | from binascii import hexlify |
38 | c30635bf | Filippos Giannakos | |
39 | c30635bf | Filippos Giannakos | from context_file import ContextFile, file_sync_read_chunks |
40 | c30635bf | Filippos Giannakos | |
41 | c30635bf | Filippos Giannakos | |
42 | c30635bf | Filippos Giannakos | class FileBlocker(object): |
43 | c30635bf | Filippos Giannakos | """Blocker.
|
44 | c30635bf | Filippos Giannakos | Required constructor parameters: blocksize, blockpath, hashtype.
|
45 | c30635bf | Filippos Giannakos | """
|
46 | c30635bf | Filippos Giannakos | |
47 | c30635bf | Filippos Giannakos | blocksize = None
|
48 | c30635bf | Filippos Giannakos | blockpath = None
|
49 | c30635bf | Filippos Giannakos | hashtype = None
|
50 | c30635bf | Filippos Giannakos | |
51 | c30635bf | Filippos Giannakos | def __init__(self, **params): |
52 | c30635bf | Filippos Giannakos | blocksize = params['blocksize']
|
53 | c30635bf | Filippos Giannakos | blockpath = params['blockpath']
|
54 | c30635bf | Filippos Giannakos | blockpath = realpath(blockpath) |
55 | c30635bf | Filippos Giannakos | if not isdir(blockpath): |
56 | c30635bf | Filippos Giannakos | if not exists(blockpath): |
57 | c30635bf | Filippos Giannakos | makedirs(blockpath) |
58 | c30635bf | Filippos Giannakos | else:
|
59 | c30635bf | Filippos Giannakos | raise ValueError("Variable blockpath '%s' is not a directory" % (blockpath,)) |
60 | c30635bf | Filippos Giannakos | |
61 | c30635bf | Filippos Giannakos | hashtype = params['hashtype']
|
62 | c30635bf | Filippos Giannakos | try:
|
63 | c30635bf | Filippos Giannakos | hasher = newhasher(hashtype) |
64 | c30635bf | Filippos Giannakos | except ValueError: |
65 | c30635bf | Filippos Giannakos | msg = "Variable hashtype '%s' is not available from hashlib"
|
66 | c30635bf | Filippos Giannakos | raise ValueError(msg % (hashtype,)) |
67 | c30635bf | Filippos Giannakos | |
68 | c30635bf | Filippos Giannakos | hasher.update("")
|
69 | c30635bf | Filippos Giannakos | emptyhash = hasher.digest() |
70 | c30635bf | Filippos Giannakos | |
71 | c30635bf | Filippos Giannakos | self.blocksize = blocksize
|
72 | c30635bf | Filippos Giannakos | self.blockpath = blockpath
|
73 | c30635bf | Filippos Giannakos | self.hashtype = hashtype
|
74 | c30635bf | Filippos Giannakos | self.hashlen = len(emptyhash) |
75 | c30635bf | Filippos Giannakos | self.emptyhash = emptyhash
|
76 | c30635bf | Filippos Giannakos | |
77 | c30635bf | Filippos Giannakos | def _pad(self, block): |
78 | c30635bf | Filippos Giannakos | return block + ('\x00' * (self.blocksize - len(block))) |
79 | c30635bf | Filippos Giannakos | |
80 | c30635bf | Filippos Giannakos | def _get_rear_block(self, blkhash, create=0): |
81 | c30635bf | Filippos Giannakos | filename = hexlify(blkhash) |
82 | c30635bf | Filippos Giannakos | dir = join(self.blockpath, filename[0:2], filename[2:4], filename[4:6]) |
83 | c30635bf | Filippos Giannakos | if not exists(dir): |
84 | c30635bf | Filippos Giannakos | makedirs(dir)
|
85 | c30635bf | Filippos Giannakos | name = join(dir, filename)
|
86 | c30635bf | Filippos Giannakos | return ContextFile(name, create)
|
87 | c30635bf | Filippos Giannakos | |
88 | c30635bf | Filippos Giannakos | def _check_rear_block(self, blkhash): |
89 | c30635bf | Filippos Giannakos | filename = hexlify(blkhash) |
90 | c30635bf | Filippos Giannakos | dir = join(self.blockpath, filename[0:2], filename[2:4], filename[4:6]) |
91 | c30635bf | Filippos Giannakos | name = join(dir, filename)
|
92 | c30635bf | Filippos Giannakos | return exists(name)
|
93 | c30635bf | Filippos Giannakos | |
94 | c30635bf | Filippos Giannakos | def block_hash(self, data): |
95 | c30635bf | Filippos Giannakos | """Hash a block of data"""
|
96 | c30635bf | Filippos Giannakos | hasher = newhasher(self.hashtype)
|
97 | c30635bf | Filippos Giannakos | hasher.update(data.rstrip('\x00'))
|
98 | c30635bf | Filippos Giannakos | return hasher.digest()
|
99 | c30635bf | Filippos Giannakos | |
100 | c30635bf | Filippos Giannakos | def block_ping(self, hashes): |
101 | c30635bf | Filippos Giannakos | """Check hashes for existence and
|
102 | c30635bf | Filippos Giannakos | return those missing from block storage.
|
103 | c30635bf | Filippos Giannakos | """
|
104 | c30635bf | Filippos Giannakos | notfound = [] |
105 | c30635bf | Filippos Giannakos | append = notfound.append |
106 | c30635bf | Filippos Giannakos | |
107 | c30635bf | Filippos Giannakos | for h in hashes: |
108 | c30635bf | Filippos Giannakos | if h not in notfound and not self._check_rear_block(h): |
109 | c30635bf | Filippos Giannakos | append(h) |
110 | c30635bf | Filippos Giannakos | |
111 | c30635bf | Filippos Giannakos | return notfound
|
112 | c30635bf | Filippos Giannakos | |
113 | c30635bf | Filippos Giannakos | def block_retr(self, hashes): |
114 | c30635bf | Filippos Giannakos | """Retrieve blocks from storage by their hashes."""
|
115 | c30635bf | Filippos Giannakos | blocksize = self.blocksize
|
116 | c30635bf | Filippos Giannakos | blocks = [] |
117 | c30635bf | Filippos Giannakos | append = blocks.append |
118 | c30635bf | Filippos Giannakos | block = None
|
119 | c30635bf | Filippos Giannakos | |
120 | c30635bf | Filippos Giannakos | for h in hashes: |
121 | c30635bf | Filippos Giannakos | if h == self.emptyhash: |
122 | c30635bf | Filippos Giannakos | append(self._pad('')) |
123 | c30635bf | Filippos Giannakos | continue
|
124 | c30635bf | Filippos Giannakos | with self._get_rear_block(h, 0) as rbl: |
125 | c30635bf | Filippos Giannakos | if not rbl: |
126 | c30635bf | Filippos Giannakos | break
|
127 | c30635bf | Filippos Giannakos | for block in rbl.sync_read_chunks(blocksize, 1, 0): |
128 | c30635bf | Filippos Giannakos | break # there should be just one block there |
129 | c30635bf | Filippos Giannakos | if not block: |
130 | c30635bf | Filippos Giannakos | break
|
131 | c30635bf | Filippos Giannakos | append(self._pad(block))
|
132 | c30635bf | Filippos Giannakos | |
133 | c30635bf | Filippos Giannakos | return blocks
|
134 | c30635bf | Filippos Giannakos | |
135 | c30635bf | Filippos Giannakos | def block_stor(self, blocklist): |
136 | c30635bf | Filippos Giannakos | """Store a bunch of blocks and return (hashes, missing).
|
137 | c30635bf | Filippos Giannakos | Hashes is a list of the hashes of the blocks,
|
138 | c30635bf | Filippos Giannakos | missing is a list of indices in that list indicating
|
139 | c30635bf | Filippos Giannakos | which blocks were missing from the store.
|
140 | c30635bf | Filippos Giannakos | """
|
141 | c30635bf | Filippos Giannakos | block_hash = self.block_hash
|
142 | c30635bf | Filippos Giannakos | hashlist = [block_hash(b) for b in blocklist] |
143 | c30635bf | Filippos Giannakos | mf = None
|
144 | c30635bf | Filippos Giannakos | missing = [i for i, h in enumerate(hashlist) if not self._check_rear_block(h)] |
145 | c30635bf | Filippos Giannakos | for i in missing: |
146 | c30635bf | Filippos Giannakos | with self._get_rear_block(hashlist[i], 1) as rbl: |
147 | c30635bf | Filippos Giannakos | rbl.sync_write(blocklist[i]) #XXX: verify?
|
148 | c30635bf | Filippos Giannakos | |
149 | c30635bf | Filippos Giannakos | return hashlist, missing
|
150 | c30635bf | Filippos Giannakos | |
151 | c30635bf | Filippos Giannakos | def block_delta(self, blkhash, offset, data): |
152 | c30635bf | Filippos Giannakos | """Construct and store a new block from a given block
|
153 | c30635bf | Filippos Giannakos | and a data 'patch' applied at offset. Return:
|
154 | c30635bf | Filippos Giannakos | (the hash of the new block, if the block already existed)
|
155 | c30635bf | Filippos Giannakos | """
|
156 | c30635bf | Filippos Giannakos | |
157 | c30635bf | Filippos Giannakos | blocksize = self.blocksize
|
158 | c30635bf | Filippos Giannakos | if offset >= blocksize or not data: |
159 | c30635bf | Filippos Giannakos | return None, None |
160 | c30635bf | Filippos Giannakos | |
161 | c30635bf | Filippos Giannakos | block = self.block_retr((blkhash,))
|
162 | c30635bf | Filippos Giannakos | if not block: |
163 | c30635bf | Filippos Giannakos | return None, None |
164 | c30635bf | Filippos Giannakos | |
165 | c30635bf | Filippos Giannakos | block = block[0]
|
166 | c30635bf | Filippos Giannakos | newblock = block[:offset] + data |
167 | c30635bf | Filippos Giannakos | if len(newblock) > blocksize: |
168 | c30635bf | Filippos Giannakos | newblock = newblock[:blocksize] |
169 | c30635bf | Filippos Giannakos | elif len(newblock) < blocksize: |
170 | c30635bf | Filippos Giannakos | newblock += block[len(newblock):]
|
171 | c30635bf | Filippos Giannakos | |
172 | c30635bf | Filippos Giannakos | h, a = self.block_stor((newblock,))
|
173 | c30635bf | Filippos Giannakos | return h[0], 1 if a else 0 |
174 | c30635bf | Filippos Giannakos | |
175 | c30635bf | Filippos Giannakos | def block_hash_file(self, openfile): |
176 | c30635bf | Filippos Giannakos | """Return the list of hashes (hashes map)
|
177 | c30635bf | Filippos Giannakos | for the blocks in a buffered file.
|
178 | c30635bf | Filippos Giannakos | Helper method, does not affect store.
|
179 | c30635bf | Filippos Giannakos | """
|
180 | c30635bf | Filippos Giannakos | hashes = [] |
181 | c30635bf | Filippos Giannakos | append = hashes.append |
182 | c30635bf | Filippos Giannakos | block_hash = self.block_hash
|
183 | c30635bf | Filippos Giannakos | |
184 | c30635bf | Filippos Giannakos | for block in file_sync_read_chunks(openfile, self.blocksize, 1, 0): |
185 | c30635bf | Filippos Giannakos | append(block_hash(block)) |
186 | c30635bf | Filippos Giannakos | |
187 | c30635bf | Filippos Giannakos | return hashes
|
188 | c30635bf | Filippos Giannakos | |
189 | c30635bf | Filippos Giannakos | def block_stor_file(self, openfile): |
190 | c30635bf | Filippos Giannakos | """Read blocks from buffered file object and store them. Return:
|
191 | c30635bf | Filippos Giannakos | (bytes read, list of hashes, list of hashes that were missing)
|
192 | c30635bf | Filippos Giannakos | """
|
193 | c30635bf | Filippos Giannakos | blocksize = self.blocksize
|
194 | c30635bf | Filippos Giannakos | block_stor = self.block_stor
|
195 | c30635bf | Filippos Giannakos | hashlist = [] |
196 | c30635bf | Filippos Giannakos | hextend = hashlist.extend |
197 | c30635bf | Filippos Giannakos | storedlist = [] |
198 | c30635bf | Filippos Giannakos | sextend = storedlist.extend |
199 | c30635bf | Filippos Giannakos | lastsize = 0
|
200 | c30635bf | Filippos Giannakos | |
201 | c30635bf | Filippos Giannakos | for block in file_sync_read_chunks(openfile, blocksize, 1, 0): |
202 | c30635bf | Filippos Giannakos | hl, sl = block_stor((block,)) |
203 | c30635bf | Filippos Giannakos | hextend(hl) |
204 | c30635bf | Filippos Giannakos | sextend(sl) |
205 | c30635bf | Filippos Giannakos | lastsize = len(block)
|
206 | c30635bf | Filippos Giannakos | |
207 | c30635bf | Filippos Giannakos | size = (len(hashlist) -1) * blocksize + lastsize if hashlist else 0 |
208 | c30635bf | Filippos Giannakos | return size, hashlist, storedlist
|