Statistics
| Branch: | Tag: | Revision:

root / contrib / snf-pithos-tools / pithos / tools / lib / hashmap.py @ 3a19e99b

History | View | Annotate | Download (3.1 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or
4
# without modification, are permitted provided that the following
5
# conditions are met:
6
#
7
#   1. Redistributions of source code must retain the above
8
#      copyright notice, this list of conditions and the following
9
#      disclaimer.
10
#
11
#   2. Redistributions in binary form must reproduce the above
12
#      copyright notice, this list of conditions and the following
13
#      disclaimer in the documentation and/or other materials
14
#      provided with the distribution.
15
#
16
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
# POSSIBILITY OF SUCH DAMAGE.
28
#
29
# The views and conclusions contained in the software and
30
# documentation are those of the authors and should not be
31
# interpreted as representing official policies, either expressed
32
# or implied, of GRNET S.A.
33

    
34
import hashlib
35
import os
36

    
37
from binascii import hexlify
38

    
39
from progress.bar import IncrementalBar
40

    
41

    
42
def file_read_iterator(fp, size=1024):
43
    while True:
44
        data = fp.read(size)
45
        if not data:
46
            break
47
        yield data
48

    
49

    
50
class HashMap(list):
51

    
52
    def __init__(self, blocksize, blockhash):
53
        super(HashMap, self).__init__()
54
        self.blocksize = blocksize
55
        self.blockhash = blockhash
56

    
57
    def _hash_raw(self, v):
58
        h = hashlib.new(self.blockhash)
59
        h.update(v)
60
        return h.digest()
61

    
62
    def _hash_block(self, v):
63
        return self._hash_raw(v.rstrip('\x00'))
64

    
65
    def hash(self):
66
        if len(self) == 0:
67
            return self._hash_raw('')
68
        if len(self) == 1:
69
            return self.__getitem__(0)
70

    
71
        h = list(self)
72
        s = 2
73
        while s < len(h):
74
            s = s * 2
75
        h += [('\x00' * len(h[0]))] * (s - len(h))
76
        while len(h) > 1:
77
            h = [self._hash_raw(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
78
        return h[0]
79

    
80
    def load(self, fp):
81
        self.size = 0
82
        file_size = os.fstat(fp.fileno()).st_size
83
        nblocks = 1 + (file_size - 1) // self.blocksize
84
        bar = IncrementalBar('Computing', max=nblocks)
85
        bar.suffix = '%(percent).1f%% - %(eta)ds'
86
        for block in bar.iter(file_read_iterator(fp, self.blocksize)):
87
            self.append(self._hash_block(block))
88
            self.size += len(block)
89

    
90

    
91
def merkle(fp, blocksize=4194304, blockhash='sha256'):
92
    hashes = HashMap(blocksize, blockhash)
93
    hashes.load(fp)
94
    return hexlify(hashes.hash())