39 |
39 |
from binascii import hexlify
|
40 |
40 |
|
41 |
41 |
from pithos.backends.lib.hashfiler import Blocker
|
|
42 |
from pithos.backends.lib.sqlalchemy import Node
|
42 |
43 |
from pithos.aai.models import PithosUser
|
43 |
44 |
|
44 |
45 |
from django.conf import settings
|
45 |
46 |
|
46 |
|
from pithos.backends.modular import ModularBackend
|
|
47 |
from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED
|
|
48 |
from pithos.backends.lib.sqlalchemy.node import Node
|
|
49 |
from pithos.backends.lib.sqlalchemy.dbwrapper import DBWrapper
|
47 |
50 |
|
48 |
51 |
from lib.transfer import upload
|
49 |
|
from lib.hashmap import HashMap
|
|
52 |
from lib.hashmap import HashMap, file_read_iterator
|
50 |
53 |
from lib.client import Fault
|
51 |
54 |
from lib.migrate import Migration
|
52 |
55 |
|
... | ... | |
54 |
57 |
import os
|
55 |
58 |
import sys
|
56 |
59 |
import hashlib
|
|
60 |
import mimetypes
|
57 |
61 |
|
58 |
|
class ObjectMigration(DataMigration):
|
59 |
|
def __init__(self, db, path, block_size, hash_algorithm):
|
60 |
|
DataMigration.__init__(self, db, path, block_size, hash_algorithm)
|
61 |
|
self.wrapper = ClientWrapper()
|
62 |
|
|
|
62 |
class ObjectMigration(Migration):
|
|
63 |
def __init__(self, old_db):
|
|
64 |
Migration.__init__(self, old_db)
|
|
65 |
self.wrapper = ClientWrapper(self.backend)
|
|
66 |
params = {'wrapper': DBWrapper(self.backend.db)}
|
|
67 |
self.node = Node(**params)
|
|
68 |
|
63 |
69 |
def create_default_containers(self):
|
64 |
70 |
users = PithosUser.objects.all()
|
65 |
71 |
for u in users:
|
... | ... | |
81 |
87 |
else:
|
82 |
88 |
return '%s/%s' %(self.get_path(parent_id), foldername)
|
83 |
89 |
|
|
90 |
def create_object(self, username, container, object, filepath, mimetype):
|
|
91 |
obj = ''
|
|
92 |
path = '/'.join(object.split('/')[:-1])
|
|
93 |
name = object.split('/')[-1]
|
|
94 |
#create directory markers
|
|
95 |
for f in path.split('/'):
|
|
96 |
obj = '%s/%s' %(obj, f) if obj else f
|
|
97 |
try:
|
|
98 |
self.wrapper.create_directory_marker('pithos', obj, username)
|
|
99 |
except NameError, e:
|
|
100 |
pass
|
|
101 |
self.wrapper.set_account(username)
|
|
102 |
|
|
103 |
prefix = '%s/' %path if path else ''
|
|
104 |
print '#', filepath, container, prefix, name, mimetype
|
|
105 |
return upload(self.wrapper, filepath, container, prefix, name, mimetype)
|
|
106 |
|
|
107 |
def create_history(self, user, header_id, node_id, deleted=False):
|
|
108 |
filebody = Table('filebody', self.metadata, autoload=True)
|
|
109 |
gss_user = Table('gss_user', self.metadata, autoload=True)
|
|
110 |
j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id)
|
|
111 |
s = select([filebody.c.filesize, gss_user.c.username], from_obj=j)
|
|
112 |
s = s.where(filebody.c.header_id == header_id)
|
|
113 |
s = s.order_by(filebody.c.version)
|
|
114 |
rp = self.conn.execute(s)
|
|
115 |
versions = rp.fetchall()
|
|
116 |
print '#', len(versions)
|
|
117 |
rp.close()
|
|
118 |
i = 0
|
|
119 |
for size, modyfied_by in versions:
|
|
120 |
cluster = CLUSTER_HISTORY if i < len(versions) - 1 else CLUSTER_NORMAL
|
|
121 |
cluster = cluster if not deleted else CLUSTER_DELETED
|
|
122 |
args = (node_id, size, None, modyfied_by, cluster)
|
|
123 |
self.node.version_create(*args)
|
|
124 |
i += 1
|
|
125 |
|
84 |
126 |
def create_objects(self):
|
85 |
127 |
fileheader = Table('fileheader', self.metadata, autoload=True)
|
86 |
128 |
filebody = Table('filebody', self.metadata, autoload=True)
|
87 |
129 |
folder = Table('folder', self.metadata, autoload=True)
|
88 |
130 |
gss_user = Table('gss_user', self.metadata, autoload=True)
|
89 |
|
j = filebody.join(fileheader, filebody.c.header_id == fileheader.c.id)
|
|
131 |
j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id)
|
90 |
132 |
j = j.join(folder, fileheader.c.folder_id == folder.c.id)
|
91 |
133 |
j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id)
|
92 |
|
s = select([gss_user.c.username, fileheader.c.folder_id, fileheader.c.name,
|
93 |
|
filebody.c.storedfilepath], from_obj=j)
|
|
134 |
s = select([gss_user.c.username, fileheader.c.id, fileheader.c.folder_id,
|
|
135 |
fileheader.c.name, fileheader.c.deleted, filebody.c.storedfilepath,
|
|
136 |
filebody.c.mimetype], from_obj=j)
|
94 |
137 |
rp = self.conn.execute(s)
|
95 |
138 |
objects = rp.fetchall()
|
96 |
|
for username, folderid, filename, filepath in objects:
|
|
139 |
for username, headerid, folderid, filename, deleted, filepath, mimetype in objects:
|
97 |
140 |
path = self.get_path(folderid)[1:]
|
98 |
|
obj = ''
|
99 |
|
#create directory markers
|
100 |
|
for f in path.split('/'):
|
101 |
|
obj = '%s/%s' %(obj, f) if obj else f
|
102 |
|
try:
|
103 |
|
self.wrapper.create_directory_marker('pithos', obj, username)
|
104 |
|
except NameError, e:
|
105 |
|
pass
|
106 |
|
self.wrapper.set_account(username)
|
107 |
|
|
108 |
|
print '#', username, path, filename
|
109 |
|
prefix = '%s/' %path if path else ''
|
110 |
|
upload(self.wrapper, filepath, 'pithos', prefix, filename)
|
|
141 |
container = 'pithos' if not deleted else 'trash'
|
|
142 |
object = '%s/%s' %(path, filename)
|
|
143 |
#filepath = '/Users/butters/Downloads/torvalds-linux-0f86267'
|
|
144 |
vserial = self.create_object(username, container, object, filepath, mimetype)
|
|
145 |
nodeid = self.node.version_get_properties(vserial, keys=('node',))[0]
|
|
146 |
self.create_history(username, headerid, nodeid, deleted)
|
|
147 |
self.node.version_remove(vserial)
|
|
148 |
#self.set_metadata()
|
|
149 |
#self.set_public()
|
|
150 |
#self.statistics()
|
|
151 |
#self.set_permissions()
|
|
152 |
|
|
153 |
def handle_deleted(self):
|
|
154 |
pass
|
|
155 |
|
|
156 |
def upload_dir(self, dir, prefix, user, container):
|
|
157 |
for f in os.listdir(dir):
|
|
158 |
fullpath = '%s/%s' %(dir, f)
|
|
159 |
if os.path.isfile(fullpath):
|
|
160 |
type = mimetypes.guess_type(fullpath)[0]
|
|
161 |
name = '/'.join(fullpath.split(prefix)[1:])
|
|
162 |
print '@', user, container, name, fullpath, type
|
|
163 |
self.create_object(user, container, name, fullpath, type)
|
|
164 |
else: self.upload_dir(fullpath, prefix, user, container)
|
111 |
165 |
|
112 |
166 |
class ClientWrapper(object):
|
113 |
167 |
"""Wraps client methods used by transfer.upload()
|
114 |
168 |
to ModularBackend methods"""
|
115 |
169 |
|
116 |
|
def __init__(self):
|
117 |
|
options = getattr(settings, 'BACKEND', None)[1]
|
118 |
|
self.backend = ModularBackend(*options)
|
|
170 |
def __init__(self, backend):
|
|
171 |
self.backend = backend
|
119 |
172 |
self.block_size = self.backend.block_size
|
120 |
173 |
self.block_hash = self.backend.hash_algorithm
|
121 |
174 |
|
... | ... | |
131 |
184 |
'hash': md5.hexdigest().lower()}
|
132 |
185 |
self.backend.update_object_hashmap(account, account, container, object, 0, [], meta)
|
133 |
186 |
|
134 |
|
def create_object_by_hashmap(self, container, object, map):
|
|
187 |
def create_object_by_hashmap(self, container, object, map, mimetype=None):
|
135 |
188 |
hashmap = HashMap(self.block_size, self.block_hash)
|
136 |
|
for hash in map['hashes']:
|
137 |
|
hashmap.append(hash)
|
|
189 |
for h in map['hashes']:
|
|
190 |
hashmap.append(h)
|
138 |
191 |
meta = {'hash':hexlify(hashmap.hash())}
|
|
192 |
if mimetype:
|
|
193 |
meta['content-type'] = mimetype
|
139 |
194 |
size = map['bytes']
|
140 |
195 |
try:
|
141 |
196 |
args = [self.account, self.account, container, object, size, map['hashes'], meta]
|
142 |
|
self.backend.update_object_hashmap(*args)
|
|
197 |
return self.backend.update_object_hashmap(*args)
|
143 |
198 |
except IndexError, ie:
|
144 |
199 |
fault = Fault(ie.data, 409)
|
145 |
200 |
raise fault
|
146 |
201 |
|
147 |
|
def create_object(self, container, object, f):
|
148 |
|
hashmap = HashMap(self.block_size, self.block_hash)
|
149 |
|
hashmap.load(f)
|
150 |
|
map = [hexlify(x) for x in hashmap]
|
151 |
|
meta = {'hash':hashmap.hash()}
|
152 |
|
size = hashmap.size
|
153 |
|
self.backend.update_object_hashmap(self.account, self.account, container, object, size, hashmap, meta)
|
|
202 |
def update_container_data(self, container, f):
|
|
203 |
#just put the blocks
|
|
204 |
for block in file_read_iterator(f, self.block_size):
|
|
205 |
self.backend.put_block(block)
|
154 |
206 |
|
155 |
207 |
def retrieve_container_metadata(self, container):
|
156 |
208 |
return {'x-container-block-size':self.block_size,
|
157 |
209 |
'x-container-block-hash':self.block_hash}
|
158 |
|
|
|
210 |
|
159 |
211 |
if __name__ == "__main__":
|
160 |
|
db = ''
|
|
212 |
old_db = ''
|
161 |
213 |
|
162 |
|
basepath = options = getattr(settings, 'PROJECT_PATH', None)
|
163 |
|
params = {'db':db,
|
164 |
|
'path':os.path.join(basepath, 'data/pithos/'),
|
165 |
|
'block_size':(4 * 1024 * 1024),
|
166 |
|
'hash_algorithm':'sha256'}
|
|
214 |
ot = ObjectMigration(old_db)
|
|
215 |
#ot.create_default_containers()
|
|
216 |
#ot.create_objects()
|
167 |
217 |
|
168 |
|
ot = ObjectMigration(**params)
|
169 |
|
ot.create_default_containers()
|
170 |
|
ot.create_objects()
|
|
218 |
p = ''
|
|
219 |
ot.upload_dir(p, p, 'chstath', 'linux')
|
171 |
220 |
|
172 |
221 |
|