Statistics
| Branch: | Tag: | Revision:

root / pithos / backends / lib / hashfiler / blocker.py @ f897bea9

History | View | Annotate | Download (7.1 kB)

1 a9b3f29d Antony Chazapis
# Copyright 2011 GRNET S.A. All rights reserved.
2 a9b3f29d Antony Chazapis
# 
3 a9b3f29d Antony Chazapis
# Redistribution and use in source and binary forms, with or
4 a9b3f29d Antony Chazapis
# without modification, are permitted provided that the following
5 a9b3f29d Antony Chazapis
# conditions are met:
6 a9b3f29d Antony Chazapis
# 
7 a9b3f29d Antony Chazapis
#   1. Redistributions of source code must retain the above
8 a9b3f29d Antony Chazapis
#      copyright notice, this list of conditions and the following
9 a9b3f29d Antony Chazapis
#      disclaimer.
10 a9b3f29d Antony Chazapis
# 
11 a9b3f29d Antony Chazapis
#   2. Redistributions in binary form must reproduce the above
12 a9b3f29d Antony Chazapis
#      copyright notice, this list of conditions and the following
13 a9b3f29d Antony Chazapis
#      disclaimer in the documentation and/or other materials
14 a9b3f29d Antony Chazapis
#      provided with the distribution.
15 a9b3f29d Antony Chazapis
# 
16 a9b3f29d Antony Chazapis
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17 a9b3f29d Antony Chazapis
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 a9b3f29d Antony Chazapis
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 a9b3f29d Antony Chazapis
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20 a9b3f29d Antony Chazapis
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 a9b3f29d Antony Chazapis
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 a9b3f29d Antony Chazapis
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 a9b3f29d Antony Chazapis
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 a9b3f29d Antony Chazapis
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 a9b3f29d Antony Chazapis
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 a9b3f29d Antony Chazapis
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 a9b3f29d Antony Chazapis
# POSSIBILITY OF SUCH DAMAGE.
28 a9b3f29d Antony Chazapis
# 
29 a9b3f29d Antony Chazapis
# The views and conclusions contained in the software and
30 a9b3f29d Antony Chazapis
# documentation are those of the authors and should not be
31 a9b3f29d Antony Chazapis
# interpreted as representing official policies, either expressed
32 a9b3f29d Antony Chazapis
# or implied, of GRNET S.A.
33 a9b3f29d Antony Chazapis
34 a9b3f29d Antony Chazapis
from os import makedirs
35 a9b3f29d Antony Chazapis
from os.path import isdir, realpath, exists, join
36 a9b3f29d Antony Chazapis
from hashlib import new as newhasher
37 a9b3f29d Antony Chazapis
from binascii import hexlify
38 a9b3f29d Antony Chazapis
39 6f4bce7b Antony Chazapis
from context_file import ContextFile, file_sync_read_chunks
40 a9b3f29d Antony Chazapis
41 a9b3f29d Antony Chazapis
42 a9b3f29d Antony Chazapis
class Blocker(object):
43 a9b3f29d Antony Chazapis
    """Blocker.
44 a9b3f29d Antony Chazapis
       Required contstructor parameters: blocksize, blockpath, hashtype.
45 a9b3f29d Antony Chazapis
    """
46 a9b3f29d Antony Chazapis
47 a9b3f29d Antony Chazapis
    blocksize = None
48 a9b3f29d Antony Chazapis
    blockpath = None
49 a9b3f29d Antony Chazapis
    hashtype = None
50 a9b3f29d Antony Chazapis
51 a9b3f29d Antony Chazapis
    def __init__(self, **params):
52 a9b3f29d Antony Chazapis
        blocksize = params['blocksize']
53 a9b3f29d Antony Chazapis
        blockpath = params['blockpath']
54 a9b3f29d Antony Chazapis
        blockpath = realpath(blockpath)
55 a9b3f29d Antony Chazapis
        if not isdir(blockpath):
56 a9b3f29d Antony Chazapis
            if not exists(blockpath):
57 a9b3f29d Antony Chazapis
                makedirs(blockpath)
58 a9b3f29d Antony Chazapis
            else:
59 a9b3f29d Antony Chazapis
                raise ValueError("Variable blockpath '%s' is not a directory" % (blockpath,))
60 a9b3f29d Antony Chazapis
61 a9b3f29d Antony Chazapis
        hashtype = params['hashtype']
62 a9b3f29d Antony Chazapis
        try:
63 a9b3f29d Antony Chazapis
            hasher = newhasher(hashtype)
64 a9b3f29d Antony Chazapis
        except ValueError:
65 a9b3f29d Antony Chazapis
            msg = "Variable hashtype '%s' is not available from hashlib"
66 a9b3f29d Antony Chazapis
            raise ValueError(msg % (hashtype,))
67 a9b3f29d Antony Chazapis
68 a9b3f29d Antony Chazapis
        hasher.update("")
69 a9b3f29d Antony Chazapis
        emptyhash = hasher.digest()
70 a9b3f29d Antony Chazapis
71 a9b3f29d Antony Chazapis
        self.blocksize = blocksize
72 a9b3f29d Antony Chazapis
        self.blockpath = blockpath
73 a9b3f29d Antony Chazapis
        self.hashtype = hashtype
74 a9b3f29d Antony Chazapis
        self.hashlen = len(emptyhash)
75 a9b3f29d Antony Chazapis
        self.emptyhash = emptyhash
76 a9b3f29d Antony Chazapis
77 f4b61e0c Antony Chazapis
    def _get_rear_block(self, blkhash, create=0):
78 3d9e4bf8 Antony Chazapis
        filename = hexlify(blkhash)
79 3d9e4bf8 Antony Chazapis
        dir = join(self.blockpath, filename[0:2], filename[2:4], filename[4:6])
80 3d9e4bf8 Antony Chazapis
        if not exists(dir):
81 3d9e4bf8 Antony Chazapis
            makedirs(dir)
82 3d9e4bf8 Antony Chazapis
        name = join(dir, filename)
83 a9b3f29d Antony Chazapis
        return ContextFile(name, create)
84 a9b3f29d Antony Chazapis
85 f4b61e0c Antony Chazapis
    def _check_rear_block(self, blkhash):
86 3d9e4bf8 Antony Chazapis
        filename = hexlify(blkhash)
87 3d9e4bf8 Antony Chazapis
        dir = join(self.blockpath, filename[0:2], filename[2:4], filename[4:6])
88 3d9e4bf8 Antony Chazapis
        name = join(dir, filename)
89 a9b3f29d Antony Chazapis
        return exists(name)
90 a9b3f29d Antony Chazapis
91 a9b3f29d Antony Chazapis
    def block_hash(self, data):
92 a9b3f29d Antony Chazapis
        """Hash a block of data"""
93 a9b3f29d Antony Chazapis
        hasher = newhasher(self.hashtype)
94 a9b3f29d Antony Chazapis
        hasher.update(data.rstrip('\x00'))
95 a9b3f29d Antony Chazapis
        return hasher.digest()
96 a9b3f29d Antony Chazapis
97 a9b3f29d Antony Chazapis
    def block_ping(self, hashes):
98 a9b3f29d Antony Chazapis
        """Check hashes for existence and
99 a9b3f29d Antony Chazapis
           return those missing from block storage.
100 a9b3f29d Antony Chazapis
        """
101 b2481861 Antony Chazapis
        notfound = []
102 b2481861 Antony Chazapis
        append = notfound.append
103 b2481861 Antony Chazapis
104 b2481861 Antony Chazapis
        for h in hashes:
105 b2481861 Antony Chazapis
            if h not in notfound and not self._check_rear_block(h):
106 b2481861 Antony Chazapis
                append(h)
107 b2481861 Antony Chazapis
108 b2481861 Antony Chazapis
        return notfound
109 a9b3f29d Antony Chazapis
110 a9b3f29d Antony Chazapis
    def block_retr(self, hashes):
111 a9b3f29d Antony Chazapis
        """Retrieve blocks from storage by their hashes."""
112 a9b3f29d Antony Chazapis
        blocksize = self.blocksize
113 a9b3f29d Antony Chazapis
        blocks = []
114 a9b3f29d Antony Chazapis
        append = blocks.append
115 a9b3f29d Antony Chazapis
        block = None
116 a9b3f29d Antony Chazapis
117 a9b3f29d Antony Chazapis
        for h in hashes:
118 a1fc118e Antony Chazapis
            if h == self.emptyhash:
119 a1fc118e Antony Chazapis
                append('')
120 a1fc118e Antony Chazapis
                continue
121 f4b61e0c Antony Chazapis
            with self._get_rear_block(h, 0) as rbl:
122 a9b3f29d Antony Chazapis
                if not rbl:
123 a9b3f29d Antony Chazapis
                    break
124 a9b3f29d Antony Chazapis
                for block in rbl.sync_read_chunks(blocksize, 1, 0):
125 a9b3f29d Antony Chazapis
                    break # there should be just one block there
126 a9b3f29d Antony Chazapis
            if not block:
127 a9b3f29d Antony Chazapis
                break
128 a9b3f29d Antony Chazapis
            append(block)
129 a9b3f29d Antony Chazapis
130 a9b3f29d Antony Chazapis
        return blocks
131 a9b3f29d Antony Chazapis
132 a9b3f29d Antony Chazapis
    def block_stor(self, blocklist):
133 a9b3f29d Antony Chazapis
        """Store a bunch of blocks and return (hashes, missing).
134 a9b3f29d Antony Chazapis
           Hashes is a list of the hashes of the blocks,
135 a9b3f29d Antony Chazapis
           missing is a list of indices in that list indicating
136 a9b3f29d Antony Chazapis
           which blocks were missing from the store.
137 a9b3f29d Antony Chazapis
        """
138 a9b3f29d Antony Chazapis
        block_hash = self.block_hash
139 a9b3f29d Antony Chazapis
        hashlist = [block_hash(b) for b in blocklist]
140 a9b3f29d Antony Chazapis
        mf = None
141 70f2b136 Antony Chazapis
        missing = [i for i, h in enumerate(hashlist) if not self._check_rear_block(h)]
142 a9b3f29d Antony Chazapis
        for i in missing:
143 f4b61e0c Antony Chazapis
            with self._get_rear_block(hashlist[i], 1) as rbl:
144 a9b3f29d Antony Chazapis
                 rbl.sync_write(blocklist[i]) #XXX: verify?
145 a9b3f29d Antony Chazapis
146 a9b3f29d Antony Chazapis
        return hashlist, missing
147 a9b3f29d Antony Chazapis
148 a9b3f29d Antony Chazapis
    def block_delta(self, blkhash, offdata=()):
149 a9b3f29d Antony Chazapis
        """Construct and store a new block from a given block
150 a9b3f29d Antony Chazapis
           and a list of (offset, data) 'patches'. Return:
151 a9b3f29d Antony Chazapis
           (the hash of the new block, if the block already existed)
152 a9b3f29d Antony Chazapis
        """
153 a9b3f29d Antony Chazapis
        if not offdata:
154 a9b3f29d Antony Chazapis
            return None, None
155 a9b3f29d Antony Chazapis
156 a9b3f29d Antony Chazapis
        blocksize = self.blocksize
157 a9b3f29d Antony Chazapis
        block = self.block_retr((blkhash,))
158 a9b3f29d Antony Chazapis
        if not block:
159 a9b3f29d Antony Chazapis
            return None, None
160 a9b3f29d Antony Chazapis
161 a9b3f29d Antony Chazapis
        block = block[0]
162 a9b3f29d Antony Chazapis
        newblock = ''
163 a9b3f29d Antony Chazapis
        idx = 0
164 a9b3f29d Antony Chazapis
        size = 0
165 a9b3f29d Antony Chazapis
        trunc = 0
166 a9b3f29d Antony Chazapis
        for off, data in offdata:
167 a9b3f29d Antony Chazapis
            if not data:
168 a9b3f29d Antony Chazapis
                trunc = 1
169 a9b3f29d Antony Chazapis
                break
170 a9b3f29d Antony Chazapis
            newblock += block[idx:off] + data
171 a9b3f29d Antony Chazapis
            size += off - idx + len(data)
172 a9b3f29d Antony Chazapis
            if size >= blocksize:
173 a9b3f29d Antony Chazapis
                break
174 a9b3f29d Antony Chazapis
            off = size
175 a9b3f29d Antony Chazapis
176 a9b3f29d Antony Chazapis
        if not trunc:
177 a9b3f29d Antony Chazapis
            newblock += block[size:len(block)]
178 a9b3f29d Antony Chazapis
179 a9b3f29d Antony Chazapis
        h, a = self.block_stor((newblock,))
180 a9b3f29d Antony Chazapis
        return h[0], 1 if a else 0
181 a9b3f29d Antony Chazapis
182 a9b3f29d Antony Chazapis
    def block_hash_file(self, openfile):
183 a9b3f29d Antony Chazapis
        """Return the list of hashes (hashes map)
184 a9b3f29d Antony Chazapis
           for the blocks in a buffered file.
185 a9b3f29d Antony Chazapis
           Helper method, does not affect store.
186 a9b3f29d Antony Chazapis
        """
187 a9b3f29d Antony Chazapis
        hashes = []
188 a9b3f29d Antony Chazapis
        append = hashes.append
189 a9b3f29d Antony Chazapis
        block_hash = self.block_hash
190 a9b3f29d Antony Chazapis
191 a9b3f29d Antony Chazapis
        for block in file_sync_read_chunks(openfile, self.blocksize, 1, 0):
192 a9b3f29d Antony Chazapis
            append(block_hash(block))
193 a9b3f29d Antony Chazapis
194 a9b3f29d Antony Chazapis
        return hashes
195 a9b3f29d Antony Chazapis
196 a9b3f29d Antony Chazapis
    def block_stor_file(self, openfile):
197 a9b3f29d Antony Chazapis
        """Read blocks from buffered file object and store them. Return:
198 a9b3f29d Antony Chazapis
           (bytes read, list of hashes, list of hashes that were missing)
199 a9b3f29d Antony Chazapis
        """
200 a9b3f29d Antony Chazapis
        blocksize = self.blocksize
201 a9b3f29d Antony Chazapis
        block_stor = self.block_stor
202 a9b3f29d Antony Chazapis
        hashlist = []
203 a9b3f29d Antony Chazapis
        hextend = hashlist.extend
204 a9b3f29d Antony Chazapis
        storedlist = []
205 a9b3f29d Antony Chazapis
        sextend = storedlist.extend
206 a9b3f29d Antony Chazapis
        lastsize = 0
207 a9b3f29d Antony Chazapis
208 a9b3f29d Antony Chazapis
        for block in file_sync_read_chunks(openfile, blocksize, 1, 0):
209 a9b3f29d Antony Chazapis
            hl, sl = block_stor((block,))
210 a9b3f29d Antony Chazapis
            hextend(hl)
211 a9b3f29d Antony Chazapis
            sextend(sl)
212 a9b3f29d Antony Chazapis
            lastsize = len(block)
213 a9b3f29d Antony Chazapis
214 a9b3f29d Antony Chazapis
        size = (len(hashlist) -1) * blocksize + lastsize if hashlist else 0
215 a9b3f29d Antony Chazapis
        return size, hashlist, storedlist