Statistics
| Branch: | Tag: | Revision:

root / snf-pithos-tools / pithos / tools / lib / hashmap.py @ 6f6cec5a

History | View | Annotate | Download (3.1 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
# 
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
# 
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
# 
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
# 
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
# 
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
import hashlib
35
import os
36

    
37
from binascii import hexlify
38

    
39
from progress.bar import IncrementalBar
40

    
41
def file_read_iterator(fp, size=1024):
42
    while True:
43
        data = fp.read(size)
44
        if not data:
45
            break
46
        yield data
47

    
48
class HashMap(list):
49
    
50
    def __init__(self, blocksize, blockhash):
51
        super(HashMap, self).__init__()
52
        self.blocksize = blocksize
53
        self.blockhash = blockhash
54
    
55
    def _hash_raw(self, v):
56
        h = hashlib.new(self.blockhash)
57
        h.update(v)
58
        return h.digest()
59
    
60
    def _hash_block(self, v):
61
        return self._hash_raw(v.rstrip('\x00'))
62
    
63
    def hash(self):
64
        if len(self) == 0:
65
            return self._hash_raw('')
66
        if len(self) == 1:
67
            return self.__getitem__(0)
68
        
69
        h = list(self)
70
        s = 2
71
        while s < len(h):
72
            s = s * 2
73
        h += [('\x00' * len(h[0]))] * (s - len(h))
74
        while len(h) > 1:
75
            h = [self._hash_raw(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
76
        return h[0]
77
    
78
    def load(self, fp):
79
        self.size = 0
80
        file_size = os.fstat(fp.fileno()).st_size
81
        nblocks = 1 + (file_size - 1) // self.blocksize
82
        bar = IncrementalBar('Computing', max=nblocks)
83
        bar.suffix = '%(percent).1f%% - %(eta)ds'
84
        for block in bar.iter(file_read_iterator(fp, self.blocksize)):
85
            self.append(self._hash_block(block))
86
            self.size += len(block)
87

    
88

    
89
def merkle(path, blocksize=4194304, blockhash='sha256'):
90
    hashes = HashMap(blocksize, blockhash)
91
    hashes.load(open(path))
92
    return hexlify(hashes.hash())