Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-tools / pithos / tools / lib / hashmap.py @ db117cac

History | View | Annotate | Download (3.1 kB)

1 2e662088 Antony Chazapis
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2 f390685d Sofia Papagiannaki
# 
3 f390685d Sofia Papagiannaki
# Redistribution and use in source and binary forms, with or
4 f390685d Sofia Papagiannaki
# without modification, are permitted provided that the following
5 f390685d Sofia Papagiannaki
# conditions are met:
6 f390685d Sofia Papagiannaki
# 
7 f390685d Sofia Papagiannaki
#   1. Redistributions of source code must retain the above
8 f390685d Sofia Papagiannaki
#      copyright notice, this list of conditions and the following
9 f390685d Sofia Papagiannaki
#      disclaimer.
10 f390685d Sofia Papagiannaki
# 
11 f390685d Sofia Papagiannaki
#   2. Redistributions in binary form must reproduce the above
12 f390685d Sofia Papagiannaki
#      copyright notice, this list of conditions and the following
13 f390685d Sofia Papagiannaki
#      disclaimer in the documentation and/or other materials
14 f390685d Sofia Papagiannaki
#      provided with the distribution.
15 f390685d Sofia Papagiannaki
# 
16 f390685d Sofia Papagiannaki
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17 f390685d Sofia Papagiannaki
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 f390685d Sofia Papagiannaki
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 f390685d Sofia Papagiannaki
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20 f390685d Sofia Papagiannaki
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 f390685d Sofia Papagiannaki
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 f390685d Sofia Papagiannaki
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 f390685d Sofia Papagiannaki
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 f390685d Sofia Papagiannaki
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 f390685d Sofia Papagiannaki
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 f390685d Sofia Papagiannaki
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 f390685d Sofia Papagiannaki
# POSSIBILITY OF SUCH DAMAGE.
28 f390685d Sofia Papagiannaki
# 
29 f390685d Sofia Papagiannaki
# The views and conclusions contained in the software and
30 f390685d Sofia Papagiannaki
# documentation are those of the authors and should not be
31 f390685d Sofia Papagiannaki
# interpreted as representing official policies, either expressed
32 f390685d Sofia Papagiannaki
# or implied, of GRNET S.A.
33 f390685d Sofia Papagiannaki
34 f390685d Sofia Papagiannaki
import hashlib
35 6f6cec5a Sofia Papagiannaki
import os
36 f390685d Sofia Papagiannaki
37 8d5abda5 Giorgos Verigakis
from binascii import hexlify
38 8d5abda5 Giorgos Verigakis
39 6f6cec5a Sofia Papagiannaki
from progress.bar import IncrementalBar
40 8d5abda5 Giorgos Verigakis
41 f390685d Sofia Papagiannaki
def file_read_iterator(fp, size=1024):
42 f390685d Sofia Papagiannaki
    while True:
43 f390685d Sofia Papagiannaki
        data = fp.read(size)
44 f390685d Sofia Papagiannaki
        if not data:
45 f390685d Sofia Papagiannaki
            break
46 f390685d Sofia Papagiannaki
        yield data
47 f390685d Sofia Papagiannaki
48 f390685d Sofia Papagiannaki
class HashMap(list):
49 f390685d Sofia Papagiannaki
    
50 822cc7eb Antony Chazapis
    def __init__(self, blocksize, blockhash):
51 f390685d Sofia Papagiannaki
        super(HashMap, self).__init__()
52 f390685d Sofia Papagiannaki
        self.blocksize = blocksize
53 f390685d Sofia Papagiannaki
        self.blockhash = blockhash
54 f390685d Sofia Papagiannaki
    
55 f390685d Sofia Papagiannaki
    def _hash_raw(self, v):
56 f390685d Sofia Papagiannaki
        h = hashlib.new(self.blockhash)
57 f390685d Sofia Papagiannaki
        h.update(v)
58 f390685d Sofia Papagiannaki
        return h.digest()
59 f390685d Sofia Papagiannaki
    
60 f390685d Sofia Papagiannaki
    def _hash_block(self, v):
61 f390685d Sofia Papagiannaki
        return self._hash_raw(v.rstrip('\x00'))
62 f390685d Sofia Papagiannaki
    
63 f390685d Sofia Papagiannaki
    def hash(self):
64 f390685d Sofia Papagiannaki
        if len(self) == 0:
65 f390685d Sofia Papagiannaki
            return self._hash_raw('')
66 f390685d Sofia Papagiannaki
        if len(self) == 1:
67 f390685d Sofia Papagiannaki
            return self.__getitem__(0)
68 f390685d Sofia Papagiannaki
        
69 f390685d Sofia Papagiannaki
        h = list(self)
70 f390685d Sofia Papagiannaki
        s = 2
71 f390685d Sofia Papagiannaki
        while s < len(h):
72 f390685d Sofia Papagiannaki
            s = s * 2
73 f390685d Sofia Papagiannaki
        h += [('\x00' * len(h[0]))] * (s - len(h))
74 f390685d Sofia Papagiannaki
        while len(h) > 1:
75 f390685d Sofia Papagiannaki
            h = [self._hash_raw(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
76 f390685d Sofia Papagiannaki
        return h[0]
77 f390685d Sofia Papagiannaki
    
78 2db16f05 Sofia Papagiannaki
    def load(self, fp):
79 2db16f05 Sofia Papagiannaki
        self.size = 0
80 6f6cec5a Sofia Papagiannaki
        file_size = os.fstat(fp.fileno()).st_size
81 6f6cec5a Sofia Papagiannaki
        nblocks = 1 + (file_size - 1) // self.blocksize
82 6f6cec5a Sofia Papagiannaki
        bar = IncrementalBar('Computing', max=nblocks)
83 6f6cec5a Sofia Papagiannaki
        bar.suffix = '%(percent).1f%% - %(eta)ds'
84 6f6cec5a Sofia Papagiannaki
        for block in bar.iter(file_read_iterator(fp, self.blocksize)):
85 2db16f05 Sofia Papagiannaki
            self.append(self._hash_block(block))
86 2db16f05 Sofia Papagiannaki
            self.size += len(block)
87 8d5abda5 Giorgos Verigakis
88 8d5abda5 Giorgos Verigakis
89 8d5abda5 Giorgos Verigakis
def merkle(path, blocksize=4194304, blockhash='sha256'):
90 8d5abda5 Giorgos Verigakis
    hashes = HashMap(blocksize, blockhash)
91 8d5abda5 Giorgos Verigakis
    hashes.load(open(path))
92 8d5abda5 Giorgos Verigakis
    return hexlify(hashes.hash())