Revision 5fdaf331 tools/migrate-db
b/tools/migrate-db | ||
---|---|---|
40 | 40 |
|
41 | 41 |
from pithos.backends.lib.hashfiler import Blocker |
42 | 42 |
from pithos.backends.lib.sqlalchemy import Node |
43 |
from pithos.aai.models import PithosUser |
|
44 | 43 |
|
45 | 44 |
from django.conf import settings |
46 | 45 |
|
47 | 46 |
from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED |
48 |
from pithos.backends.lib.sqlalchemy.node import Node |
|
47 |
from pithos.backends.lib.sqlalchemy.node import Node, ROOTNODE
|
|
49 | 48 |
|
50 | 49 |
from lib.transfer import upload |
51 | 50 |
from lib.hashmap import HashMap, file_read_iterator |
52 | 51 |
from lib.client import Fault |
53 | 52 |
from lib.migrate import Migration, Cache |
54 | 53 |
from calendar import timegm |
54 |
from decimal import Decimal |
|
55 |
from collections import defaultdict |
|
55 | 56 |
|
56 | 57 |
import json |
57 | 58 |
import os |
58 | 59 |
import sys |
59 | 60 |
import hashlib |
60 | 61 |
import mimetypes |
62 |
import time |
|
63 |
import datetime |
|
64 |
|
|
65 |
(ID, CREATIONDATE, MODIFICATIONDATE, DELETED, ICON, NAME, VERSION, CREATEDBY_ID, MODIFIEDBY_ID, OWNER_ID, PARENT_ID, READFORALL, SHARED, USER) = range(14) |
|
61 | 66 |
|
62 | 67 |
class ObjectMigration(Migration): |
63 | 68 |
def __init__(self, old_db, db, f): |
64 | 69 |
Migration.__init__(self, old_db) |
65 | 70 |
self.cache = Cache(db) |
66 | 71 |
|
67 |
def create_node(self, username, container, object, filepath, mimetype): |
|
68 |
obj = '' |
|
69 |
path = '/'.join(object.split('/')[:-1]) |
|
70 |
name = object.split('/')[-1] |
|
71 |
#create directory markers |
|
72 |
for f in path.split('/'): |
|
73 |
obj = '%s/%s' %(obj, f) if obj else f |
|
74 |
try: |
|
75 |
md5 = hashlib.md5() |
|
76 |
meta = {'Content-Type':'application/directory', |
|
77 |
'hash': md5.hexdigest().lower()} |
|
78 |
self.backend.update_object_hashmap(username, username, container, obj, 0, [], meta) |
|
79 |
except NameError, e: |
|
80 |
pass |
|
81 |
|
|
82 |
parent_path = '%s/%s' %(username, container) |
|
83 |
parent_node = self.backend.node.node_lookup(parent_path) |
|
84 |
path = '%s/%s' %(parent_path, object) |
|
85 |
nodeid = self.backend.node.node_create(parent_node, path) |
|
86 |
return nodeid |
|
72 |
def create_node(self, username, container, object): |
|
73 |
node = self.backend.node.node_lookup(object) |
|
74 |
if not node: |
|
75 |
parent_path = '%s/%s' %(username, container) |
|
76 |
parent_node = self.backend.node.node_lookup(parent_path) |
|
77 |
if not parent_node: |
|
78 |
raise Exception('Missing node') |
|
79 |
node = self.backend.node.node_create(parent_node, object) |
|
80 |
return node |
|
87 | 81 |
|
88 | 82 |
def create_history(self, header_id, node_id, deleted=False): |
89 | 83 |
i = 0 |
90 | 84 |
map = HashMap(self.backend.block_size, self.backend.hash_algorithm) |
85 |
v = [] |
|
86 |
stored_versions = self.backend.node.node_get_versions(node_id, ['mtime']) |
|
87 |
stored_versions_mtime = [datetime.datetime.utcfromtimestamp(elem[0]) for elem in stored_versions] |
|
91 | 88 |
for t, rowcount in self.retrieve_node_versions(header_id): |
92 |
size, modyfied_by, filepath, mimetype, modificationdate = t |
|
89 |
size, modyfied_by, filepath, mimetype, mdate = t |
|
90 |
if mdate in stored_versions_mtime: |
|
91 |
continue |
|
93 | 92 |
cluster = CLUSTER_HISTORY if i < rowcount - 1 else CLUSTER_NORMAL |
94 | 93 |
cluster = cluster if not deleted else CLUSTER_DELETED |
95 | 94 |
hash = self.cache.get(filepath) |
96 | 95 |
if hash == None: |
97 |
raise Exception("Missing hash") |
|
98 |
args = (node_id, hash, size, None, modyfied_by, cluster) |
|
99 |
serial = self.backend.node.version_create(*args)[0] |
|
100 |
meta = {'hash':hash, |
|
101 |
'content-type':mimetype} |
|
102 |
self.backend.node.attribute_set(serial, ((k, v) for k, v in meta.iteritems())) |
|
103 |
timestamp = timegm(modificationdate.timetuple()) |
|
104 |
microseconds = modificationdate.time().microsecond |
|
105 |
f.write('update versions set mtime=\'%10d.%6d\' where serial=%s;' %(timestamp, microseconds, serial)) |
|
96 |
raise Exception("Missing hash") |
|
97 |
args = node_id, hash, size, modyfied_by, cluster, mimetype, mdate |
|
98 |
v.append(self.create_version(*args)) |
|
106 | 99 |
i += 1 |
100 |
return v |
|
101 |
|
|
102 |
def create_version(self, node_id, hash, size, modyfied_by, cluster, mimetype, mdate): |
|
103 |
args = (node_id, hash, size, None, modyfied_by, cluster) |
|
104 |
serial = self.backend.node.version_create(*args)[0] |
|
105 |
meta = {'hash':hash, |
|
106 |
'content-type':mimetype} |
|
107 |
self.backend.node.attribute_set(serial, ((k, v) for k, v in meta.iteritems())) |
|
108 |
timestamp = timegm(mdate.timetuple()) |
|
109 |
microseconds = mdate.time().microsecond |
|
110 |
values = timestamp, microseconds, serial |
|
111 |
f.write('update versions set mtime=\'%10d.%6d\' where serial=%s;' %values) |
|
112 |
return serial |
|
113 |
|
|
114 |
def create_tags(self, header_id, node_id, vserials): |
|
115 |
tags = self.retrieve_tags(header_id) |
|
116 |
if not tags: |
|
117 |
return |
|
118 |
for v in vserials: |
|
119 |
self.backend.node.attribute_set(v, (('tag', tags),)) |
|
107 | 120 |
|
108 |
def create_metadata(self, header_id, node_id): |
|
109 |
for t in self.retrieve_metadata(header_id): |
|
110 |
pass |
|
121 |
def create_permissions(self, fid, path, owner): |
|
122 |
fpath, permissions = self.backend.permissions.access_inherit(path) |
|
123 |
if not permissions: |
|
124 |
permissions = self.retrieve_permissions(fid) |
|
125 |
keys = ('read', 'write') |
|
126 |
for k in keys: |
|
127 |
if owner in permissions[k]: |
|
128 |
permissions[k].remove(owner) |
|
129 |
self.backend.permissions.access_set(path, permissions) |
|
111 | 130 |
|
112 | 131 |
def create_objects(self): |
113 |
for username, headerid, folderid, filename, deleted, filepath, mimetype in self.retrieve_current_nodes(): |
|
114 |
path = self.retrieve_path(folderid)[1:] |
|
115 |
container = 'pithos' if not deleted else 'trash' |
|
132 |
for t in self.retrieve_current_nodes(): |
|
133 |
username, headerid, folderid, filename, deleted, filepath, mimetype, public, owner_id = t |
|
134 |
containers = ['pithos', 'trash'] |
|
135 |
|
|
136 |
for c in containers: |
|
137 |
#create container if it does not exist |
|
138 |
try: |
|
139 |
self.backend._lookup_container(username, c) |
|
140 |
except NameError, e: |
|
141 |
self.backend.put_container(username, username, c) |
|
116 | 142 |
|
117 |
#create container if it does not exist |
|
118 |
try: |
|
119 |
self.backend._lookup_container(username, container) |
|
120 |
except NameError: |
|
121 |
self.backend.put_container(username, username, container) |
|
143 |
container = 'pithos' if not deleted else 'trash' |
|
144 |
path = self.build_path(folderid) |
|
122 | 145 |
|
123 | 146 |
#create node |
124 |
object = '%s/%s' %(path, filename) |
|
125 |
nodeid = self.create_node(username, container, object, filepath, mimetype) |
|
147 |
object = '%s/%s/%s/%s' %(username, container, path, filename) |
|
148 |
args = username, container, object, filepath, mimetype |
|
149 |
nodeid = self.create_node(*args) |
|
126 | 150 |
|
127 |
#create node history |
|
128 |
self.create_history(headerid, nodeid, deleted) |
|
151 |
#create node history
|
|
152 |
vserials = self.create_history(headerid, nodeid, deleted)
|
|
129 | 153 |
|
130 |
self.create_metadata(headerid, nodeid) |
|
131 |
#self.set_public() |
|
132 |
#self.statistics() |
|
133 |
#self.set_permissions() |
|
154 |
#set object tags |
|
155 |
self.create_tags(headerid, nodeid, vserials) |
|
156 |
|
|
157 |
#set object's publicity |
|
158 |
if public: |
|
159 |
self.backend.permissions.public_set(object) |
|
160 |
|
|
161 |
#set object's permissions |
|
162 |
self.create_permissions(headerid, object, username) |
|
134 | 163 |
|
135 |
def retrieve_path(self, child_id): |
|
136 |
folderTable = Table('folder', self.metadata, autoload=True) |
|
137 |
s = select([folderTable.c.parent_id, folderTable.c.name]) |
|
138 |
s = s.where(folderTable.c.id == child_id) |
|
164 |
def build_path(self, child_id): |
|
165 |
folder = Table('folder', self.metadata, autoload=True) |
|
166 |
user = Table('gss_user', self.metadata, autoload=True) |
|
167 |
j = folder.join(user, folder.c.owner_id == user.c.id) |
|
168 |
s = select([folder, user.c.username], from_obj=j) |
|
169 |
s = s.where(folder.c.id == child_id) |
|
170 |
s.order_by(folder.c.modificationdate) |
|
139 | 171 |
rp = self.conn.execute(s) |
140 |
parent_id, foldername = rp.fetchone() |
|
141 |
if not parent_id: |
|
172 |
t = rp.fetchone() |
|
173 |
md5 = hashlib.md5() |
|
174 |
hash = md5.hexdigest().lower() |
|
175 |
size = 0 |
|
176 |
if not t[PARENT_ID]: |
|
142 | 177 |
return '' |
143 | 178 |
else: |
144 |
return '%s/%s' %(self.retrieve_path(parent_id), foldername) |
|
179 |
container_path = t[USER] |
|
180 |
container_path += '/trash' if t[DELETED] else '/pithos' |
|
181 |
parent_node = self.backend.node.node_lookup(container_path) |
|
182 |
if not parent_node: |
|
183 |
raise Exception('Missing node:', container_path) |
|
184 |
parent_path = self.build_path(t[PARENT_ID]) |
|
185 |
path = '%s/%s/%s' %(container_path, parent_path, t[NAME]) if parent_path else '%s/%s' %(container_path, t[NAME]) |
|
186 |
node = self.backend.node.node_lookup(path) |
|
187 |
if not node: |
|
188 |
node = self.backend.node.node_create(parent_node, path) |
|
189 |
if not node: |
|
190 |
raise Exception('Unable to create node:', path) |
|
191 |
|
|
192 |
#create versions |
|
193 |
v = self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[CREATIONDATE]) |
|
194 |
if t[CREATIONDATE] != t[MODIFICATIONDATE]: |
|
195 |
self.backend.node.version_recluster(v, CLUSTER_HISTORY) |
|
196 |
self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[MODIFICATIONDATE]) |
|
197 |
|
|
198 |
#set permissions |
|
199 |
self.create_permissions(t[ID], path, t[USER]) |
|
200 |
return '%s/%s' %(parent_path, t[NAME]) if parent_path else t[NAME] |
|
145 | 201 |
|
146 | 202 |
def retrieve_current_nodes(self): |
147 | 203 |
fileheader = Table('fileheader', self.metadata, autoload=True) |
... | ... | |
152 | 208 |
j = j.join(folder, fileheader.c.folder_id == folder.c.id) |
153 | 209 |
j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id) |
154 | 210 |
s = select([gss_user.c.username, fileheader.c.id, fileheader.c.folder_id, |
155 |
fileheader.c.name, fileheader.c.deleted, filebody.c.storedfilepath,
|
|
156 |
filebody.c.mimetype], from_obj=j)
|
|
157 |
s = s.limit(1)
|
|
211 |
fileheader.c.name, fileheader.c.deleted, |
|
212 |
filebody.c.storedfilepath, filebody.c.mimetype,
|
|
213 |
fileheader.c.readforall, fileheader.c.owner_id], from_obj=j)
|
|
158 | 214 |
rp = self.conn.execute(s) |
159 | 215 |
object = rp.fetchone() |
160 | 216 |
while object: |
... | ... | |
178 | 234 |
version = rp.fetchone() |
179 | 235 |
rp.close() |
180 | 236 |
|
181 |
def retrieve_metadata(self, header_id):
|
|
237 |
def retrieve_tags(self, header_id):
|
|
182 | 238 |
filetag = Table('filetag', self.metadata, autoload=True) |
183 |
s = filetag.select(filetag.c.fileid == header_id)
|
|
239 |
s = select([filetag.c.tag], filetag.c.fileid == header_id)
|
|
184 | 240 |
rp = self.conn.execute(s) |
185 |
tag = rp.fetchone() |
|
186 |
while tag: |
|
187 |
yield tag |
|
188 |
tag = tp.fetchone() |
|
241 |
tags = rp.fetchall() if rp.returns_rows else [] |
|
242 |
tags = [elem[0] for elem in tags] |
|
189 | 243 |
rp.close() |
244 |
return ','.join(tags) if tags else '' |
|
190 | 245 |
|
191 |
def handle_deleted(self): |
|
192 |
pass |
|
246 |
def retrieve_permissions(self, id, is_folder=True): |
|
247 |
permissions = {} |
|
248 |
if is_folder: |
|
249 |
ftable = Table('folder_permission', self.metadata, autoload=True) |
|
250 |
else: |
|
251 |
ftable = Table('fileheader_permission', self.metadata, autoload=True) |
|
252 |
permission = Table('permission', self.metadata, autoload=True) |
|
253 |
group = Table('gss_group', self.metadata, autoload=True) |
|
254 |
user = Table('gss_user', self.metadata, autoload=True) |
|
255 |
j = ftable.join(permission, ftable.c.permissions_id == permission.c.id) |
|
256 |
j1 = j.join(group, group.c.id == permission.c.group_id) |
|
257 |
j2 = j.join(user, user.c.id == permission.c.user_id) |
|
258 |
|
|
259 |
permissions = defaultdict(list) |
|
260 |
|
|
261 |
#get folder read groups |
|
262 |
s = select([group.c.name], from_obj=j1) |
|
263 |
if is_folder: |
|
264 |
s = s.where(ftable.c.folder_id == id) |
|
265 |
else: |
|
266 |
s = s.where(ftable.c.fileheader_id == id) |
|
267 |
s = s.where(permission.c.read == True) |
|
268 |
s = s.where(permission.c.group_id != None) |
|
269 |
rp = self.conn.execute(s) |
|
270 |
permissions['read'].extend([e[0] for e in rp.fetchall()]) |
|
193 | 271 |
|
272 |
#get folder read users |
|
273 |
s = select([user.c.username], from_obj=j2) |
|
274 |
if is_folder: |
|
275 |
s = s.where(ftable.c.folder_id == id) |
|
276 |
else: |
|
277 |
s = s.where(ftable.c.fileheader_id == id) |
|
278 |
s = s.where(permission.c.read == True) |
|
279 |
s = s.where(permission.c.user_id != None) |
|
280 |
rp = self.conn.execute(s) |
|
281 |
permissions['read'].extend([e[0] for e in rp.fetchall()]) |
|
282 |
|
|
283 |
#get folder write groups |
|
284 |
s = select([group.c.name], from_obj=j1) |
|
285 |
if is_folder: |
|
286 |
s = s.where(ftable.c.folder_id == id) |
|
287 |
else: |
|
288 |
s = s.where(ftable.c.fileheader_id == id) |
|
289 |
s = s.where(permission.c.write == True) |
|
290 |
s = s.where(permission.c.group_id != None) |
|
291 |
rp = self.conn.execute(s) |
|
292 |
permissions['write'].extend([e[0] for e in rp.fetchall()]) |
|
293 |
|
|
294 |
#get folder write groups |
|
295 |
s = select([user.c.username], from_obj=j2) |
|
296 |
if is_folder: |
|
297 |
s = s.where(ftable.c.folder_id == id) |
|
298 |
else: |
|
299 |
s = s.where(ftable.c.fileheader_id == id) |
|
300 |
s = s.where(permission.c.write == True) |
|
301 |
s = s.where(permission.c.user_id != None) |
|
302 |
rp = self.conn.execute(s) |
|
303 |
permissions['write'].extend([e[0] for e in rp.fetchall()]) |
|
304 |
|
|
305 |
rp.close() |
|
306 |
return permissions |
|
307 |
|
|
194 | 308 |
if __name__ == "__main__": |
195 | 309 |
old_db = '' |
196 | 310 |
db = '' |
Also available in: Unified diff