Revision 2db16f05 tools/migrate_db
b/tools/migrate_db | ||
---|---|---|
39 | 39 |
from binascii import hexlify |
40 | 40 |
|
41 | 41 |
from pithos.backends.lib.hashfiler import Blocker |
42 |
from pithos.backends.lib.sqlalchemy import Node |
|
42 | 43 |
from pithos.aai.models import PithosUser |
43 | 44 |
|
44 | 45 |
from django.conf import settings |
45 | 46 |
|
46 |
from pithos.backends.modular import ModularBackend |
|
47 |
from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED |
|
48 |
from pithos.backends.lib.sqlalchemy.node import Node |
|
49 |
from pithos.backends.lib.sqlalchemy.dbwrapper import DBWrapper |
|
47 | 50 |
|
48 | 51 |
from lib.transfer import upload |
49 |
from lib.hashmap import HashMap |
|
52 |
from lib.hashmap import HashMap, file_read_iterator
|
|
50 | 53 |
from lib.client import Fault |
51 | 54 |
from lib.migrate import Migration |
52 | 55 |
|
... | ... | |
54 | 57 |
import os |
55 | 58 |
import sys |
56 | 59 |
import hashlib |
60 |
import mimetypes |
|
57 | 61 |
|
58 |
class ObjectMigration(DataMigration): |
|
59 |
def __init__(self, db, path, block_size, hash_algorithm): |
|
60 |
DataMigration.__init__(self, db, path, block_size, hash_algorithm) |
|
61 |
self.wrapper = ClientWrapper() |
|
62 |
|
|
62 |
class ObjectMigration(Migration): |
|
63 |
def __init__(self, old_db): |
|
64 |
Migration.__init__(self, old_db) |
|
65 |
self.wrapper = ClientWrapper(self.backend) |
|
66 |
params = {'wrapper': DBWrapper(self.backend.db)} |
|
67 |
self.node = Node(**params) |
|
68 |
|
|
63 | 69 |
def create_default_containers(self): |
64 | 70 |
users = PithosUser.objects.all() |
65 | 71 |
for u in users: |
... | ... | |
81 | 87 |
else: |
82 | 88 |
return '%s/%s' %(self.get_path(parent_id), foldername) |
83 | 89 |
|
90 |
def create_object(self, username, container, object, filepath, mimetype): |
|
91 |
obj = '' |
|
92 |
path = '/'.join(object.split('/')[:-1]) |
|
93 |
name = object.split('/')[-1] |
|
94 |
#create directory markers |
|
95 |
for f in path.split('/'): |
|
96 |
obj = '%s/%s' %(obj, f) if obj else f |
|
97 |
try: |
|
98 |
self.wrapper.create_directory_marker('pithos', obj, username) |
|
99 |
except NameError, e: |
|
100 |
pass |
|
101 |
self.wrapper.set_account(username) |
|
102 |
|
|
103 |
prefix = '%s/' %path if path else '' |
|
104 |
print '#', filepath, container, prefix, name, mimetype |
|
105 |
return upload(self.wrapper, filepath, container, prefix, name, mimetype) |
|
106 |
|
|
107 |
def create_history(self, user, header_id, node_id, deleted=False): |
|
108 |
filebody = Table('filebody', self.metadata, autoload=True) |
|
109 |
gss_user = Table('gss_user', self.metadata, autoload=True) |
|
110 |
j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id) |
|
111 |
s = select([filebody.c.filesize, gss_user.c.username], from_obj=j) |
|
112 |
s = s.where(filebody.c.header_id == header_id) |
|
113 |
s = s.order_by(filebody.c.version) |
|
114 |
rp = self.conn.execute(s) |
|
115 |
versions = rp.fetchall() |
|
116 |
print '#', len(versions) |
|
117 |
rp.close() |
|
118 |
i = 0 |
|
119 |
for size, modyfied_by in versions: |
|
120 |
cluster = CLUSTER_HISTORY if i < len(versions) - 1 else CLUSTER_NORMAL |
|
121 |
cluster = cluster if not deleted else CLUSTER_DELETED |
|
122 |
args = (node_id, size, None, modyfied_by, cluster) |
|
123 |
self.node.version_create(*args) |
|
124 |
i += 1 |
|
125 |
|
|
84 | 126 |
def create_objects(self): |
85 | 127 |
fileheader = Table('fileheader', self.metadata, autoload=True) |
86 | 128 |
filebody = Table('filebody', self.metadata, autoload=True) |
87 | 129 |
folder = Table('folder', self.metadata, autoload=True) |
88 | 130 |
gss_user = Table('gss_user', self.metadata, autoload=True) |
89 |
j = filebody.join(fileheader, filebody.c.header_id == fileheader.c.id)
|
|
131 |
j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id)
|
|
90 | 132 |
j = j.join(folder, fileheader.c.folder_id == folder.c.id) |
91 | 133 |
j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id) |
92 |
s = select([gss_user.c.username, fileheader.c.folder_id, fileheader.c.name, |
|
93 |
filebody.c.storedfilepath], from_obj=j) |
|
134 |
s = select([gss_user.c.username, fileheader.c.id, fileheader.c.folder_id, |
|
135 |
fileheader.c.name, fileheader.c.deleted, filebody.c.storedfilepath, |
|
136 |
filebody.c.mimetype], from_obj=j) |
|
94 | 137 |
rp = self.conn.execute(s) |
95 | 138 |
objects = rp.fetchall() |
96 |
for username, folderid, filename, filepath in objects:
|
|
139 |
for username, headerid, folderid, filename, deleted, filepath, mimetype in objects:
|
|
97 | 140 |
path = self.get_path(folderid)[1:] |
98 |
obj = '' |
|
99 |
#create directory markers |
|
100 |
for f in path.split('/'): |
|
101 |
obj = '%s/%s' %(obj, f) if obj else f |
|
102 |
try: |
|
103 |
self.wrapper.create_directory_marker('pithos', obj, username) |
|
104 |
except NameError, e: |
|
105 |
pass |
|
106 |
self.wrapper.set_account(username) |
|
107 |
|
|
108 |
print '#', username, path, filename |
|
109 |
prefix = '%s/' %path if path else '' |
|
110 |
upload(self.wrapper, filepath, 'pithos', prefix, filename) |
|
141 |
container = 'pithos' if not deleted else 'trash' |
|
142 |
object = '%s/%s' %(path, filename) |
|
143 |
#filepath = '/Users/butters/Downloads/torvalds-linux-0f86267' |
|
144 |
vserial = self.create_object(username, container, object, filepath, mimetype) |
|
145 |
nodeid = self.node.version_get_properties(vserial, keys=('node',))[0] |
|
146 |
self.create_history(username, headerid, nodeid, deleted) |
|
147 |
self.node.version_remove(vserial) |
|
148 |
#self.set_metadata() |
|
149 |
#self.set_public() |
|
150 |
#self.statistics() |
|
151 |
#self.set_permissions() |
|
152 |
|
|
153 |
def handle_deleted(self): |
|
154 |
pass |
|
155 |
|
|
156 |
def upload_dir(self, dir, prefix, user, container): |
|
157 |
for f in os.listdir(dir): |
|
158 |
fullpath = '%s/%s' %(dir, f) |
|
159 |
if os.path.isfile(fullpath): |
|
160 |
type = mimetypes.guess_type(fullpath)[0] |
|
161 |
name = '/'.join(fullpath.split(prefix)[1:]) |
|
162 |
print '@', user, container, name, fullpath, type |
|
163 |
self.create_object(user, container, name, fullpath, type) |
|
164 |
else: self.upload_dir(fullpath, prefix, user, container) |
|
111 | 165 |
|
112 | 166 |
class ClientWrapper(object): |
113 | 167 |
"""Wraps client methods used by transfer.upload() |
114 | 168 |
to ModularBackend methods""" |
115 | 169 |
|
116 |
def __init__(self): |
|
117 |
options = getattr(settings, 'BACKEND', None)[1] |
|
118 |
self.backend = ModularBackend(*options) |
|
170 |
def __init__(self, backend): |
|
171 |
self.backend = backend |
|
119 | 172 |
self.block_size = self.backend.block_size |
120 | 173 |
self.block_hash = self.backend.hash_algorithm |
121 | 174 |
|
... | ... | |
131 | 184 |
'hash': md5.hexdigest().lower()} |
132 | 185 |
self.backend.update_object_hashmap(account, account, container, object, 0, [], meta) |
133 | 186 |
|
134 |
def create_object_by_hashmap(self, container, object, map): |
|
187 |
def create_object_by_hashmap(self, container, object, map, mimetype=None):
|
|
135 | 188 |
hashmap = HashMap(self.block_size, self.block_hash) |
136 |
for hash in map['hashes']:
|
|
137 |
hashmap.append(hash)
|
|
189 |
for h in map['hashes']: |
|
190 |
hashmap.append(h) |
|
138 | 191 |
meta = {'hash':hexlify(hashmap.hash())} |
192 |
if mimetype: |
|
193 |
meta['content-type'] = mimetype |
|
139 | 194 |
size = map['bytes'] |
140 | 195 |
try: |
141 | 196 |
args = [self.account, self.account, container, object, size, map['hashes'], meta] |
142 |
self.backend.update_object_hashmap(*args) |
|
197 |
return self.backend.update_object_hashmap(*args)
|
|
143 | 198 |
except IndexError, ie: |
144 | 199 |
fault = Fault(ie.data, 409) |
145 | 200 |
raise fault |
146 | 201 |
|
147 |
def create_object(self, container, object, f): |
|
148 |
hashmap = HashMap(self.block_size, self.block_hash) |
|
149 |
hashmap.load(f) |
|
150 |
map = [hexlify(x) for x in hashmap] |
|
151 |
meta = {'hash':hashmap.hash()} |
|
152 |
size = hashmap.size |
|
153 |
self.backend.update_object_hashmap(self.account, self.account, container, object, size, hashmap, meta) |
|
202 |
def update_container_data(self, container, f): |
|
203 |
#just put the blocks |
|
204 |
for block in file_read_iterator(f, self.block_size): |
|
205 |
self.backend.put_block(block) |
|
154 | 206 |
|
155 | 207 |
def retrieve_container_metadata(self, container): |
156 | 208 |
return {'x-container-block-size':self.block_size, |
157 | 209 |
'x-container-block-hash':self.block_hash} |
158 |
|
|
210 |
|
|
159 | 211 |
if __name__ == "__main__": |
160 |
db = '' |
|
212 |
old_db = ''
|
|
161 | 213 |
|
162 |
basepath = options = getattr(settings, 'PROJECT_PATH', None) |
|
163 |
params = {'db':db, |
|
164 |
'path':os.path.join(basepath, 'data/pithos/'), |
|
165 |
'block_size':(4 * 1024 * 1024), |
|
166 |
'hash_algorithm':'sha256'} |
|
214 |
ot = ObjectMigration(old_db) |
|
215 |
#ot.create_default_containers() |
|
216 |
#ot.create_objects() |
|
167 | 217 |
|
168 |
ot = ObjectMigration(**params) |
|
169 |
ot.create_default_containers() |
|
170 |
ot.create_objects() |
|
218 |
p = '' |
|
219 |
ot.upload_dir(p, p, 'chstath', 'linux') |
|
171 | 220 |
|
172 | 221 |
|
Also available in: Unified diff