Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-tools / pithos / tools / lib / hashmap.py @ 6e147ecc

History | View | Annotate | Download (2.8 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
import hashlib
35

    
36
from binascii import hexlify
37

    
38

    
39
def file_read_iterator(fp, size=1024):
40
    while True:
41
        data = fp.read(size)
42
        if not data:
43
            break
44
        yield data
45

    
46
class HashMap(list):
47
    
48
    def __init__(self, blocksize, blockhash):
49
        super(HashMap, self).__init__()
50
        self.blocksize = blocksize
51
        self.blockhash = blockhash
52
    
53
    def _hash_raw(self, v):
54
        h = hashlib.new(self.blockhash)
55
        h.update(v)
56
        return h.digest()
57
    
58
    def _hash_block(self, v):
59
        return self._hash_raw(v.rstrip('\x00'))
60
    
61
    def hash(self):
62
        if len(self) == 0:
63
            return self._hash_raw('')
64
        if len(self) == 1:
65
            return self.__getitem__(0)
66
        
67
        h = list(self)
68
        s = 2
69
        while s < len(h):
70
            s = s * 2
71
        h += [('\x00' * len(h[0]))] * (s - len(h))
72
        while len(h) > 1:
73
            h = [self._hash_raw(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
74
        return h[0]
75
    
76
    def load(self, fp):
77
        self.size = 0
78
        for block in file_read_iterator(fp, self.blocksize):
79
            self.append(self._hash_block(block))
80
            self.size += len(block)
81

    
82

    
83
def merkle(path, blocksize=4194304, blockhash='sha256'):
84
    hashes = HashMap(blocksize, blockhash)
85
    hashes.load(open(path))
86
    return hexlify(hashes.hash())