Revision a156c8b3
b/pithos/backends/simple.py | ||
---|---|---|
35 | 35 |
import time |
36 | 36 |
import sqlite3 |
37 | 37 |
import logging |
38 |
import types |
|
39 | 38 |
import hashlib |
40 |
import shutil |
|
41 |
import pickle |
|
39 |
import binascii |
|
42 | 40 |
|
43 | 41 |
from base import NotAllowedError, BaseBackend |
42 |
from pithos.lib.hashfiler import Mapper, Blocker |
|
44 | 43 |
|
45 | 44 |
|
46 | 45 |
logger = logging.getLogger(__name__) |
... | ... | |
52 | 51 |
Uses SQLite for storage. |
53 | 52 |
""" |
54 | 53 |
|
55 |
# TODO: Automatic/manual clean-up after a time interval. |
|
56 |
|
|
57 | 54 |
def __init__(self, db): |
58 |
self.hash_algorithm = 'sha1'
|
|
59 |
self.block_size = 128 * 1024 # 128KB
|
|
55 |
self.hash_algorithm = 'sha256'
|
|
56 |
self.block_size = 4 * 1024 * 1024 # 4MB
|
|
60 | 57 |
|
61 | 58 |
self.default_policy = {'quota': 0, 'versioning': 'auto'} |
62 | 59 |
|
63 | 60 |
basepath = os.path.split(db)[0] |
64 | 61 |
if basepath and not os.path.exists(basepath): |
65 | 62 |
os.makedirs(basepath) |
63 |
if not os.path.isdir(basepath): |
|
64 |
raise RuntimeError("Cannot open database at '%s'" % (db,)) |
|
66 | 65 |
|
67 |
self.con = sqlite3.connect(db, check_same_thread=False)
|
|
66 |
self.con = sqlite3.connect(basepath + '/db', check_same_thread=False)
|
|
68 | 67 |
|
69 | 68 |
sql = '''pragma foreign_keys = on''' |
70 | 69 |
self.con.execute(sql) |
... | ... | |
85 | 84 |
foreign key (version_id) references versions(version_id) |
86 | 85 |
on delete cascade)''' |
87 | 86 |
self.con.execute(sql) |
88 |
sql = '''create table if not exists hashmaps ( |
|
89 |
version_id integer, |
|
90 |
pos integer, |
|
91 |
block_id text, |
|
92 |
primary key (version_id, pos) |
|
93 |
foreign key (version_id) references versions(version_id) |
|
94 |
on delete cascade)''' |
|
95 |
self.con.execute(sql) |
|
96 |
sql = '''create table if not exists blocks ( |
|
97 |
block_id text, data blob, primary key (block_id))''' |
|
98 |
self.con.execute(sql) |
|
99 | 87 |
|
100 | 88 |
sql = '''create table if not exists policy ( |
101 | 89 |
name text, key text, value text, primary key (name, key))''' |
... | ... | |
111 | 99 |
name text, primary key (name))''' |
112 | 100 |
self.con.execute(sql) |
113 | 101 |
self.con.commit() |
102 |
|
|
103 |
params = {'blocksize': self.block_size, |
|
104 |
'blockpath': basepath + '/blocks', |
|
105 |
'hashtype': self.hash_algorithm} |
|
106 |
self.blocker = Blocker(**params) |
|
107 |
|
|
108 |
params = {'mappath': basepath + '/maps', |
|
109 |
'namelen': self.blocker.hashlen} |
|
110 |
self.mapper = Mapper(**params) |
|
114 | 111 |
|
115 | 112 |
def get_account_meta(self, user, account, until=None): |
116 | 113 |
"""Return a dictionary with the account metadata.""" |
... | ... | |
154 | 151 |
logger.debug("update_account_meta: %s %s %s", account, meta, replace) |
155 | 152 |
if user != account: |
156 | 153 |
raise NotAllowedError |
157 |
self._put_metadata(user, account, meta, replace) |
|
154 |
self._put_metadata(user, account, meta, replace, False)
|
|
158 | 155 |
|
159 | 156 |
def get_account_groups(self, user, account): |
160 | 157 |
"""Return a dictionary with the user groups defined for this account.""" |
... | ... | |
254 | 251 |
if user != account: |
255 | 252 |
raise NotAllowedError |
256 | 253 |
path, version_id, mtime = self._get_containerinfo(account, container) |
257 |
self._put_metadata(user, path, meta, replace) |
|
254 |
self._put_metadata(user, path, meta, replace, False)
|
|
258 | 255 |
|
259 | 256 |
def get_container_policy(self, user, account, container): |
260 | 257 |
"""Return a dictionary with the container policy.""" |
... | ... | |
329 | 326 |
self.con.execute(sql, (path, path + '/%',)) |
330 | 327 |
sql = 'delete from policy where name = ?' |
331 | 328 |
self.con.execute(sql, (path,)) |
332 |
self._copy_version(user, account, account, True, True) # New account version (for timestamp update).
|
|
329 |
self._copy_version(user, account, account, True, False) # New account version (for timestamp update).
|
|
333 | 330 |
|
334 | 331 |
def list_objects(self, user, account, container, prefix='', delimiter=None, marker=None, limit=10000, virtual=True, keys=[], until=None): |
335 | 332 |
"""Return a list of objects existing under a container.""" |
... | ... | |
421 | 418 |
logger.debug("get_object_hashmap: %s %s %s %s", account, container, name, version) |
422 | 419 |
self._can_read(user, account, container, name) |
423 | 420 |
path, version_id, muser, mtime, size = self._get_objectinfo(account, container, name, version) |
424 |
sql = 'select block_id from hashmaps where version_id = ? order by pos asc' |
|
425 |
c = self.con.execute(sql, (version_id,)) |
|
426 |
hashmap = [x[0] for x in c.fetchall()] |
|
427 |
return size, hashmap |
|
421 |
hashmap = self.mapper.map_retr(version_id) |
|
422 |
return size, [binascii.hexlify(x) for x in hashmap] |
|
428 | 423 |
|
429 | 424 |
def update_object_hashmap(self, user, account, container, name, size, hashmap, meta={}, replace_meta=False, permissions=None): |
430 | 425 |
"""Create/update an object with the specified size and partial hashes.""" |
... | ... | |
433 | 428 |
if permissions is not None and user != account: |
434 | 429 |
raise NotAllowedError |
435 | 430 |
self._can_write(user, account, container, name) |
436 |
missing = [] |
|
437 |
for i in range(len(hashmap)): |
|
438 |
sql = 'select count(*) from blocks where block_id = ?' |
|
439 |
c = self.con.execute(sql, (hashmap[i],)) |
|
440 |
if c.fetchone()[0] == 0: |
|
441 |
missing.append(hashmap[i]) |
|
431 |
missing = self.blocker.block_ping([binascii.unhexlify(x) for x in hashmap]) |
|
442 | 432 |
if missing: |
443 | 433 |
ie = IndexError() |
444 | 434 |
ie.data = missing |
... | ... | |
450 | 440 |
src_version_id, dest_version_id = self._copy_version(user, path, path, not replace_meta, False) |
451 | 441 |
sql = 'update versions set size = ? where version_id = ?' |
452 | 442 |
self.con.execute(sql, (size, dest_version_id)) |
453 |
# TODO: Check for block_id existence. |
|
454 |
for i in range(len(hashmap)): |
|
455 |
sql = 'insert or replace into hashmaps (version_id, pos, block_id) values (?, ?, ?)' |
|
456 |
self.con.execute(sql, (dest_version_id, i, hashmap[i])) |
|
443 |
self.mapper.map_stor(dest_version_id, [binascii.unhexlify(x) for x in hashmap]) |
|
457 | 444 |
for k, v in meta.iteritems(): |
458 | 445 |
sql = 'insert or replace into metadata (version_id, key, value) values (?, ?, ?)' |
459 | 446 |
self.con.execute(sql, (dest_version_id, k, v)) |
... | ... | |
536 | 523 |
"""Return a block's data.""" |
537 | 524 |
|
538 | 525 |
logger.debug("get_block: %s", hash) |
539 |
c = self.con.execute('select data from blocks where block_id = ?', (hash,)) |
|
540 |
row = c.fetchone() |
|
541 |
if row: |
|
542 |
return str(row[0]) |
|
543 |
else: |
|
526 |
blocks = self.blocker.block_retr((binascii.unhexlify(hash),)) |
|
527 |
if not blocks: |
|
544 | 528 |
raise NameError('Block does not exist') |
529 |
return blocks[0] |
|
545 | 530 |
|
546 | 531 |
def put_block(self, data): |
547 | 532 |
"""Create a block and return the hash.""" |
548 | 533 |
|
549 | 534 |
logger.debug("put_block: %s", len(data)) |
550 |
h = hashlib.new(self.hash_algorithm) |
|
551 |
h.update(data.rstrip('\x00')) |
|
552 |
hash = h.hexdigest() |
|
553 |
sql = 'insert or ignore into blocks (block_id, data) values (?, ?)' |
|
554 |
self.con.execute(sql, (hash, buffer(data))) |
|
555 |
self.con.commit() |
|
556 |
return hash |
|
535 |
hashes, absent = self.blocker.block_stor((data,)) |
|
536 |
return binascii.hexlify(hashes[0]) |
|
557 | 537 |
|
558 | 538 |
def update_block(self, hash, data, offset=0): |
559 | 539 |
"""Update a known block and return the hash.""" |
... | ... | |
561 | 541 |
logger.debug("update_block: %s %s %s", hash, len(data), offset) |
562 | 542 |
if offset == 0 and len(data) == self.block_size: |
563 | 543 |
return self.put_block(data) |
564 |
src_data = self.get_block(hash) |
|
565 |
bs = self.block_size |
|
566 |
if offset < 0 or offset > bs or offset + len(data) > bs: |
|
567 |
raise IndexError('Offset or data outside block limits') |
|
568 |
dest_data = src_data[:offset] + data + src_data[offset + len(data):] |
|
569 |
return self.put_block(dest_data) |
|
544 |
h, e = self.blocker.block_delta(binascii.unhexlify(hash), ((offset, data),)) |
|
545 |
return binascii.hexlify(h) |
|
570 | 546 |
|
571 | 547 |
def _sql_until(self, until=None): |
572 | 548 |
"""Return the sql to get the latest versions until the timestamp given.""" |
... | ... | |
631 | 607 |
sql = sql % dest_version_id |
632 | 608 |
self.con.execute(sql, (src_version_id,)) |
633 | 609 |
if copy_data and src_version_id is not None: |
634 |
sql = 'insert into hashmaps select %s, pos, block_id from hashmaps where version_id = ?'
|
|
635 |
sql = sql % dest_version_id
|
|
636 |
self.con.execute(sql, (src_version_id,))
|
|
610 |
# TODO: Copy properly.
|
|
611 |
hashmap = self.mapper.map_retr(src_version_id)
|
|
612 |
self.mapper.map_stor(dest_version_id, hashmap)
|
|
637 | 613 |
self.con.commit() |
638 | 614 |
return src_version_id, dest_version_id |
639 | 615 |
|
... | ... | |
678 | 654 |
c = self.con.execute(sql, (version,)) |
679 | 655 |
return dict(c.fetchall()) |
680 | 656 |
|
681 |
def _put_metadata(self, user, path, meta, replace=False): |
|
657 |
def _put_metadata(self, user, path, meta, replace=False, copy_data=True):
|
|
682 | 658 |
"""Create a new version and store metadata.""" |
683 | 659 |
|
684 |
src_version_id, dest_version_id = self._copy_version(user, path, path, not replace, True)
|
|
660 |
src_version_id, dest_version_id = self._copy_version(user, path, path, not replace, copy_data)
|
|
685 | 661 |
for k, v in meta.iteritems(): |
686 | 662 |
if not replace and v == '': |
687 | 663 |
sql = 'delete from metadata where version_id = ? and key = ?' |
... | ... | |
865 | 841 |
return objects[start:start + limit] |
866 | 842 |
|
867 | 843 |
def _del_version(self, version): |
868 |
sql = 'delete from hashmaps where version_id = ?' |
|
869 |
self.con.execute(sql, (version,)) |
|
844 |
self.mapper.map_remv(version) |
|
870 | 845 |
sql = 'delete from versions where version_id = ?' |
871 | 846 |
self.con.execute(sql, (version,)) |
b/pithos/lib/hashfiler/__init__.py | ||
---|---|---|
1 |
# Copyright 2011 GRNET S.A. All rights reserved. |
|
2 |
# |
|
3 |
# Redistribution and use in source and binary forms, with or |
|
4 |
# without modification, are permitted provided that the following |
|
5 |
# conditions are met: |
|
6 |
# |
|
7 |
# 1. Redistributions of source code must retain the above |
|
8 |
# copyright notice, this list of conditions and the following |
|
9 |
# disclaimer. |
|
10 |
# |
|
11 |
# 2. Redistributions in binary form must reproduce the above |
|
12 |
# copyright notice, this list of conditions and the following |
|
13 |
# disclaimer in the documentation and/or other materials |
|
14 |
# provided with the distribution. |
|
15 |
# |
|
16 |
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS |
|
17 |
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
18 |
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
19 |
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR |
|
20 |
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
21 |
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
22 |
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
|
23 |
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
|
24 |
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
25 |
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
|
26 |
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
27 |
# POSSIBILITY OF SUCH DAMAGE. |
|
28 |
# |
|
29 |
# The views and conclusions contained in the software and |
|
30 |
# documentation are those of the authors and should not be |
|
31 |
# interpreted as representing official policies, either expressed |
|
32 |
# or implied, of GRNET S.A. |
|
33 |
|
|
34 |
from blocker import Blocker |
|
35 |
from mapper import Mapper |
|
36 |
|
b/pithos/lib/hashfiler/blocker.py | ||
---|---|---|
1 |
# Copyright 2011 GRNET S.A. All rights reserved. |
|
2 |
# |
|
3 |
# Redistribution and use in source and binary forms, with or |
|
4 |
# without modification, are permitted provided that the following |
|
5 |
# conditions are met: |
|
6 |
# |
|
7 |
# 1. Redistributions of source code must retain the above |
|
8 |
# copyright notice, this list of conditions and the following |
|
9 |
# disclaimer. |
|
10 |
# |
|
11 |
# 2. Redistributions in binary form must reproduce the above |
|
12 |
# copyright notice, this list of conditions and the following |
|
13 |
# disclaimer in the documentation and/or other materials |
|
14 |
# provided with the distribution. |
|
15 |
# |
|
16 |
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS |
|
17 |
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
18 |
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
19 |
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR |
|
20 |
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
21 |
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
22 |
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
|
23 |
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
|
24 |
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
25 |
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
|
26 |
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
27 |
# POSSIBILITY OF SUCH DAMAGE. |
|
28 |
# |
|
29 |
# The views and conclusions contained in the software and |
|
30 |
# documentation are those of the authors and should not be |
|
31 |
# interpreted as representing official policies, either expressed |
|
32 |
# or implied, of GRNET S.A. |
|
33 |
|
|
34 |
from os import makedirs |
|
35 |
from os.path import isdir, realpath, exists, join |
|
36 |
from hashlib import new as newhasher |
|
37 |
from binascii import hexlify |
|
38 |
|
|
39 |
from pithos.lib.hashfiler.context_file import ContextFile, file_sync_read_chunks |
|
40 |
|
|
41 |
|
|
42 |
class Blocker(object): |
|
43 |
"""Blocker. |
|
44 |
Required contstructor parameters: blocksize, blockpath, hashtype. |
|
45 |
""" |
|
46 |
|
|
47 |
blocksize = None |
|
48 |
blockpath = None |
|
49 |
hashtype = None |
|
50 |
|
|
51 |
def __init__(self, **params): |
|
52 |
blocksize = params['blocksize'] |
|
53 |
blockpath = params['blockpath'] |
|
54 |
blockpath = realpath(blockpath) |
|
55 |
if not isdir(blockpath): |
|
56 |
if not exists(blockpath): |
|
57 |
makedirs(blockpath) |
|
58 |
else: |
|
59 |
raise ValueError("Variable blockpath '%s' is not a directory" % (blockpath,)) |
|
60 |
|
|
61 |
hashtype = params['hashtype'] |
|
62 |
try: |
|
63 |
hasher = newhasher(hashtype) |
|
64 |
except ValueError: |
|
65 |
msg = "Variable hashtype '%s' is not available from hashlib" |
|
66 |
raise ValueError(msg % (hashtype,)) |
|
67 |
|
|
68 |
hasher.update("") |
|
69 |
emptyhash = hasher.digest() |
|
70 |
|
|
71 |
self.blocksize = blocksize |
|
72 |
self.blockpath = blockpath |
|
73 |
self.hashtype = hashtype |
|
74 |
self.hashlen = len(emptyhash) |
|
75 |
self.emptyhash = emptyhash |
|
76 |
|
|
77 |
def get_rear_block(self, blkhash, create=0): |
|
78 |
name = join(self.blockpath, hexlify(blkhash)) |
|
79 |
return ContextFile(name, create) |
|
80 |
|
|
81 |
def check_rear_block(self, blkhash): |
|
82 |
name = join(self.blockpath, hexlify(blkhash)) |
|
83 |
return exists(name) |
|
84 |
|
|
85 |
def block_hash(self, data): |
|
86 |
"""Hash a block of data""" |
|
87 |
hasher = newhasher(self.hashtype) |
|
88 |
hasher.update(data.rstrip('\x00')) |
|
89 |
return hasher.digest() |
|
90 |
|
|
91 |
def block_ping(self, hashes): |
|
92 |
"""Check hashes for existence and |
|
93 |
return those missing from block storage. |
|
94 |
""" |
|
95 |
missing = [] |
|
96 |
append = missing.append |
|
97 |
for i, h in enumerate(hashes): |
|
98 |
if not self.check_rear_block(h): |
|
99 |
append(i) |
|
100 |
return missing |
|
101 |
|
|
102 |
def block_retr(self, hashes): |
|
103 |
"""Retrieve blocks from storage by their hashes.""" |
|
104 |
blocksize = self.blocksize |
|
105 |
blocks = [] |
|
106 |
append = blocks.append |
|
107 |
block = None |
|
108 |
|
|
109 |
for h in hashes: |
|
110 |
with self.get_rear_block(h, 0) as rbl: |
|
111 |
if not rbl: |
|
112 |
break |
|
113 |
for block in rbl.sync_read_chunks(blocksize, 1, 0): |
|
114 |
break # there should be just one block there |
|
115 |
if not block: |
|
116 |
break |
|
117 |
append(block) |
|
118 |
|
|
119 |
return blocks |
|
120 |
|
|
121 |
def block_stor(self, blocklist): |
|
122 |
"""Store a bunch of blocks and return (hashes, missing). |
|
123 |
Hashes is a list of the hashes of the blocks, |
|
124 |
missing is a list of indices in that list indicating |
|
125 |
which blocks were missing from the store. |
|
126 |
""" |
|
127 |
block_hash = self.block_hash |
|
128 |
hashlist = [block_hash(b) for b in blocklist] |
|
129 |
mf = None |
|
130 |
missing = self.block_ping(hashlist) |
|
131 |
for i in missing: |
|
132 |
with self.get_rear_block(hashlist[i], 1) as rbl: |
|
133 |
rbl.sync_write(blocklist[i]) #XXX: verify? |
|
134 |
|
|
135 |
return hashlist, missing |
|
136 |
|
|
137 |
def block_delta(self, blkhash, offdata=()): |
|
138 |
"""Construct and store a new block from a given block |
|
139 |
and a list of (offset, data) 'patches'. Return: |
|
140 |
(the hash of the new block, if the block already existed) |
|
141 |
""" |
|
142 |
if not offdata: |
|
143 |
return None, None |
|
144 |
|
|
145 |
blocksize = self.blocksize |
|
146 |
block = self.block_retr((blkhash,)) |
|
147 |
if not block: |
|
148 |
return None, None |
|
149 |
|
|
150 |
block = block[0] |
|
151 |
newblock = '' |
|
152 |
idx = 0 |
|
153 |
size = 0 |
|
154 |
trunc = 0 |
|
155 |
for off, data in offdata: |
|
156 |
if not data: |
|
157 |
trunc = 1 |
|
158 |
break |
|
159 |
newblock += block[idx:off] + data |
|
160 |
size += off - idx + len(data) |
|
161 |
if size >= blocksize: |
|
162 |
break |
|
163 |
off = size |
|
164 |
|
|
165 |
if not trunc: |
|
166 |
newblock += block[size:len(block)] |
|
167 |
|
|
168 |
h, a = self.block_stor((newblock,)) |
|
169 |
return h[0], 1 if a else 0 |
|
170 |
|
|
171 |
def block_hash_file(self, openfile): |
|
172 |
"""Return the list of hashes (hashes map) |
|
173 |
for the blocks in a buffered file. |
|
174 |
Helper method, does not affect store. |
|
175 |
""" |
|
176 |
hashes = [] |
|
177 |
append = hashes.append |
|
178 |
block_hash = self.block_hash |
|
179 |
|
|
180 |
for block in file_sync_read_chunks(openfile, self.blocksize, 1, 0): |
|
181 |
append(block_hash(block)) |
|
182 |
|
|
183 |
return hashes |
|
184 |
|
|
185 |
def block_stor_file(self, openfile): |
|
186 |
"""Read blocks from buffered file object and store them. Return: |
|
187 |
(bytes read, list of hashes, list of hashes that were missing) |
|
188 |
""" |
|
189 |
blocksize = self.blocksize |
|
190 |
block_stor = self.block_stor |
|
191 |
hashlist = [] |
|
192 |
hextend = hashlist.extend |
|
193 |
storedlist = [] |
|
194 |
sextend = storedlist.extend |
|
195 |
lastsize = 0 |
|
196 |
|
|
197 |
for block in file_sync_read_chunks(openfile, blocksize, 1, 0): |
|
198 |
hl, sl = block_stor((block,)) |
|
199 |
hextend(hl) |
|
200 |
sextend(sl) |
|
201 |
lastsize = len(block) |
|
202 |
|
|
203 |
size = (len(hashlist) -1) * blocksize + lastsize if hashlist else 0 |
|
204 |
return size, hashlist, storedlist |
|
205 |
|
b/pithos/lib/hashfiler/context_file.py | ||
---|---|---|
1 |
# Copyright 2011 GRNET S.A. All rights reserved. |
|
2 |
# |
|
3 |
# Redistribution and use in source and binary forms, with or |
|
4 |
# without modification, are permitted provided that the following |
|
5 |
# conditions are met: |
|
6 |
# |
|
7 |
# 1. Redistributions of source code must retain the above |
|
8 |
# copyright notice, this list of conditions and the following |
|
9 |
# disclaimer. |
|
10 |
# |
|
11 |
# 2. Redistributions in binary form must reproduce the above |
|
12 |
# copyright notice, this list of conditions and the following |
|
13 |
# disclaimer in the documentation and/or other materials |
|
14 |
# provided with the distribution. |
|
15 |
# |
|
16 |
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS |
|
17 |
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
18 |
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
19 |
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR |
|
20 |
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
21 |
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
22 |
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
|
23 |
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
|
24 |
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
25 |
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
|
26 |
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
27 |
# POSSIBILITY OF SUCH DAMAGE. |
|
28 |
# |
|
29 |
# The views and conclusions contained in the software and |
|
30 |
# documentation are those of the authors and should not be |
|
31 |
# interpreted as representing official policies, either expressed |
|
32 |
# or implied, of GRNET S.A. |
|
33 |
|
|
34 |
from os import SEEK_CUR, SEEK_SET, fsync |
|
35 |
from errno import ENOENT |
|
36 |
|
|
37 |
|
|
38 |
_zeros = '' |
|
39 |
|
|
40 |
|
|
41 |
def zeros(nr): |
|
42 |
global _zeros |
|
43 |
size = len(_zeros) |
|
44 |
if nr == size: |
|
45 |
return _zeros |
|
46 |
|
|
47 |
if nr > size: |
|
48 |
_zeros += '\0' * (nr - size) |
|
49 |
return _zeros |
|
50 |
|
|
51 |
if nr < size: |
|
52 |
_zeros = _zeros[:nr] |
|
53 |
return _zeros |
|
54 |
|
|
55 |
|
|
56 |
def file_sync_write_chunks(openfile, chunksize, offset, chunks, size=None): |
|
57 |
"""Write given chunks to the given buffered file object. |
|
58 |
Writes never span across chunk boundaries. |
|
59 |
If size is given stop after or pad until size bytes have been written. |
|
60 |
""" |
|
61 |
fwrite = openfile.write |
|
62 |
seek = openfile.seek |
|
63 |
padding = 0 |
|
64 |
|
|
65 |
try: |
|
66 |
seek(offset * chunksize) |
|
67 |
except IOError, e: |
|
68 |
seek = None |
|
69 |
for x in xrange(offset): |
|
70 |
fwrite(zeros(chunksize)) |
|
71 |
|
|
72 |
cursize = offset * chunksize |
|
73 |
|
|
74 |
for chunk in chunks: |
|
75 |
if padding: |
|
76 |
if seek: |
|
77 |
seek(padding -1, SEEK_CUR) |
|
78 |
fwrite("\x00") |
|
79 |
else: |
|
80 |
fwrite(buffer(zeros(chunksize), 0, padding)) |
|
81 |
if size is not None and cursize + chunksize >= size: |
|
82 |
chunk = chunk[:chunksize - (cursize - size)] |
|
83 |
fwrite(chunk) |
|
84 |
cursize += len(chunk) |
|
85 |
break |
|
86 |
fwrite(chunk) |
|
87 |
padding = chunksize - len(chunk) |
|
88 |
|
|
89 |
padding = size - cursize if size is not None else 0 |
|
90 |
if padding <= 0: |
|
91 |
return |
|
92 |
|
|
93 |
q, r = divmod(padding, chunksize) |
|
94 |
for x in xrange(q): |
|
95 |
fwrite(zeros(chunksize)) |
|
96 |
fwrite(buffer(zeros(chunksize), 0, r)) |
|
97 |
|
|
98 |
|
|
99 |
def file_sync_read_chunks(openfile, chunksize, nr, offset=0): |
|
100 |
"""Read and yield groups of chunks from a buffered file object at offset. |
|
101 |
Reads never span accros chunksize boundaries. |
|
102 |
""" |
|
103 |
fread = openfile.read |
|
104 |
remains = offset * chunksize |
|
105 |
seek = openfile.seek |
|
106 |
try: |
|
107 |
seek(remains) |
|
108 |
except IOError, e: |
|
109 |
seek = None |
|
110 |
while 1: |
|
111 |
s = fread(remains) |
|
112 |
remains -= len(s) |
|
113 |
if remains <= 0: |
|
114 |
break |
|
115 |
|
|
116 |
while nr: |
|
117 |
remains = chunksize |
|
118 |
chunk = '' |
|
119 |
while 1: |
|
120 |
s = fread(remains) |
|
121 |
if not s: |
|
122 |
if chunk: |
|
123 |
yield chunk |
|
124 |
return |
|
125 |
chunk += s |
|
126 |
remains -= len(s) |
|
127 |
if remains <= 0: |
|
128 |
break |
|
129 |
yield chunk |
|
130 |
nr -= 1 |
|
131 |
|
|
132 |
|
|
133 |
class ContextFile(object): |
|
134 |
__slots__ = ("name", "fdesc", "create") |
|
135 |
|
|
136 |
def __init__(self, name, create=0): |
|
137 |
self.name = name |
|
138 |
self.fdesc = None |
|
139 |
self.create = create |
|
140 |
#self.dirty = 0 |
|
141 |
|
|
142 |
def __enter__(self): |
|
143 |
name = self.name |
|
144 |
try: |
|
145 |
fdesc = open(name, 'rb+') |
|
146 |
except IOError, e: |
|
147 |
if not self.create or e.errno != ENOENT: |
|
148 |
raise |
|
149 |
fdesc = open(name, 'w+') |
|
150 |
|
|
151 |
self.fdesc = fdesc |
|
152 |
return self |
|
153 |
|
|
154 |
def __exit__(self, exc, arg, trace): |
|
155 |
fdesc = self.fdesc |
|
156 |
if fdesc is not None: |
|
157 |
#if self.dirty: |
|
158 |
# fsync(fdesc.fileno()) |
|
159 |
fdesc.close() |
|
160 |
return False # propagate exceptions |
|
161 |
|
|
162 |
def seek(self, offset, whence=SEEK_SET): |
|
163 |
return self.fdesc.seek(offset, whence) |
|
164 |
|
|
165 |
def tell(self): |
|
166 |
return self.fdesc.tell() |
|
167 |
|
|
168 |
def truncate(self, size): |
|
169 |
self.fdesc.truncate(size) |
|
170 |
|
|
171 |
def sync_write(self, data): |
|
172 |
#self.dirty = 1 |
|
173 |
self.fdesc.write(data) |
|
174 |
|
|
175 |
def sync_write_chunks(self, chunksize, offset, chunks, size=None): |
|
176 |
#self.dirty = 1 |
|
177 |
return file_sync_write_chunks(self.fdesc, chunksize, offset, chunks, size) |
|
178 |
|
|
179 |
def sync_read(self, size): |
|
180 |
read = self.fdesc.read |
|
181 |
data = '' |
|
182 |
while 1: |
|
183 |
s = read(size) |
|
184 |
if not s: |
|
185 |
break |
|
186 |
data += s |
|
187 |
return data |
|
188 |
|
|
189 |
def sync_read_chunks(self, chunksize, nr, offset=0): |
|
190 |
return file_sync_read_chunks(self.fdesc, chunksize, nr, offset) |
|
191 |
|
b/pithos/lib/hashfiler/mapper.py | ||
---|---|---|
1 |
# Copyright 2011 GRNET S.A. All rights reserved. |
|
2 |
# |
|
3 |
# Redistribution and use in source and binary forms, with or |
|
4 |
# without modification, are permitted provided that the following |
|
5 |
# conditions are met: |
|
6 |
# |
|
7 |
# 1. Redistributions of source code must retain the above |
|
8 |
# copyright notice, this list of conditions and the following |
|
9 |
# disclaimer. |
|
10 |
# |
|
11 |
# 2. Redistributions in binary form must reproduce the above |
|
12 |
# copyright notice, this list of conditions and the following |
|
13 |
# disclaimer in the documentation and/or other materials |
|
14 |
# provided with the distribution. |
|
15 |
# |
|
16 |
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS |
|
17 |
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
18 |
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
19 |
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR |
|
20 |
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
21 |
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
22 |
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
|
23 |
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
|
24 |
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
25 |
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
|
26 |
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
27 |
# POSSIBILITY OF SUCH DAMAGE. |
|
28 |
# |
|
29 |
# The views and conclusions contained in the software and |
|
30 |
# documentation are those of the authors and should not be |
|
31 |
# interpreted as representing official policies, either expressed |
|
32 |
# or implied, of GRNET S.A. |
|
33 |
|
|
34 |
from os.path import realpath, join, exists, isdir |
|
35 |
from os import makedirs, unlink |
|
36 |
from errno import ENOENT |
|
37 |
|
|
38 |
from pithos.lib.hashfiler.context_file import ContextFile |
|
39 |
|
|
40 |
|
|
41 |
class Mapper(object): |
|
42 |
"""Mapper. |
|
43 |
Required contstructor parameters: mappath, namelen. |
|
44 |
""" |
|
45 |
|
|
46 |
mappath = None |
|
47 |
namelen = None |
|
48 |
|
|
49 |
def __init__(self, **params): |
|
50 |
self.params = params |
|
51 |
self.namelen = params['namelen'] |
|
52 |
mappath = realpath(params['mappath']) |
|
53 |
if not isdir(mappath): |
|
54 |
if not exists(mappath): |
|
55 |
makedirs(mappath) |
|
56 |
else: |
|
57 |
raise ValueError("Variable mappath '%s' is not a directory" % (mappath,)) |
|
58 |
self.mappath = mappath |
|
59 |
|
|
60 |
def get_rear_map(self, name, create=0): |
|
61 |
name = join(self.mappath, hex(int(name))) |
|
62 |
return ContextFile(name, create) |
|
63 |
|
|
64 |
def delete_rear_map(self, name): |
|
65 |
name = join(self.mappath, hex(int(name))) |
|
66 |
try: |
|
67 |
unlink(name) |
|
68 |
return 1 |
|
69 |
except OSError, e: |
|
70 |
if e.errno != ENOENT: |
|
71 |
raise |
|
72 |
return 0 |
|
73 |
|
|
74 |
def map_retr(self, name, blkoff=0, nr=100000000000000): |
|
75 |
"""Return as a list, part of the hashes map of an object |
|
76 |
at the given block offset. |
|
77 |
By default, return the whole hashes map. |
|
78 |
""" |
|
79 |
namelen = self.namelen |
|
80 |
hashes = () |
|
81 |
|
|
82 |
with self.get_rear_map(name, 0) as rmap: |
|
83 |
if rmap: |
|
84 |
hashes = list(rmap.sync_read_chunks(namelen, nr, blkoff)) |
|
85 |
return hashes |
|
86 |
|
|
87 |
def map_stor(self, name, hashes=(), blkoff=0, create=1): |
|
88 |
"""Store hashes in the given hashes map, replacing the old ones.""" |
|
89 |
namelen = self.namelen |
|
90 |
with self.get_rear_map(name, 1) as rmap: |
|
91 |
rmap.sync_write_chunks(namelen, blkoff, hashes, None) |
|
92 |
|
|
93 |
# def map_copy(self, src, dst): |
|
94 |
# """Copy a hashes map to another one, replacing it.""" |
|
95 |
# with self.get_rear_map(src, 0) as rmap: |
|
96 |
# if rmap: |
|
97 |
# rmap.copy_to(dst) |
|
98 |
|
|
99 |
def map_remv(self, name): |
|
100 |
"""Remove a hashes map. Returns true if the map was found and removed.""" |
|
101 |
return self.delete_rear_map(name) |
|
102 |
|
b/pithos/settings.py.dist | ||
---|---|---|
66 | 66 |
|
67 | 67 |
# The backend to use and its initilization options. |
68 | 68 |
if TEST: |
69 |
BACKEND = ('SimpleBackend', (os.path.join(PROJECT_PATH, 'data/testpithos.db'),))
|
|
69 |
BACKEND = ('SimpleBackend', (os.path.join(PROJECT_PATH, 'data/test/'),))
|
|
70 | 70 |
else: |
71 |
BACKEND = ('SimpleBackend', (os.path.join(PROJECT_PATH, 'data/pithos.db'),))
|
|
71 |
BACKEND = ('SimpleBackend', (os.path.join(PROJECT_PATH, 'data/pithos/'),))
|
|
72 | 72 |
|
73 | 73 |
# Local time zone for this installation. Choices can be found here: |
74 | 74 |
# http://en.wikipedia.org/wiki/List_of_tz_zones_by_name |
Also available in: Unified diff