root / tools / migrate-db @ 5fdaf331
History | View | Annotate | Download (13 kB)
1 |
#!/usr/bin/env python |
---|---|
2 |
|
3 |
# Copyright 2011 GRNET S.A. All rights reserved. |
4 |
# |
5 |
# Redistribution and use in source and binary forms, with or |
6 |
# without modification, are permitted provided that the following |
7 |
# conditions are met: |
8 |
# |
9 |
# 1. Redistributions of source code must retain the above |
10 |
# copyright notice, this list of conditions and the following |
11 |
# disclaimer. |
12 |
# |
13 |
# 2. Redistributions in binary form must reproduce the above |
14 |
# copyright notice, this list of conditions and the following |
15 |
# disclaimer in the documentation and/or other materials |
16 |
# provided with the distribution. |
17 |
# |
18 |
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS |
19 |
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
20 |
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
21 |
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR |
22 |
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 |
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 |
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
25 |
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
26 |
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 |
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
28 |
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 |
# POSSIBILITY OF SUCH DAMAGE. |
30 |
# |
31 |
# The views and conclusions contained in the software and |
32 |
# documentation are those of the authors and should not be |
33 |
# interpreted as representing official policies, either expressed |
34 |
# or implied, of GRNET S.A. |
35 |
|
36 |
from sqlalchemy import Table |
37 |
from sqlalchemy.sql import select |
38 |
|
39 |
from binascii import hexlify |
40 |
|
41 |
from pithos.backends.lib.hashfiler import Blocker |
42 |
from pithos.backends.lib.sqlalchemy import Node |
43 |
|
44 |
from django.conf import settings |
45 |
|
46 |
from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED |
47 |
from pithos.backends.lib.sqlalchemy.node import Node, ROOTNODE |
48 |
|
49 |
from lib.transfer import upload |
50 |
from lib.hashmap import HashMap, file_read_iterator |
51 |
from lib.client import Fault |
52 |
from lib.migrate import Migration, Cache |
53 |
from calendar import timegm |
54 |
from decimal import Decimal |
55 |
from collections import defaultdict |
56 |
|
57 |
import json |
58 |
import os |
59 |
import sys |
60 |
import hashlib |
61 |
import mimetypes |
62 |
import time |
63 |
import datetime |
64 |
|
65 |
(ID, CREATIONDATE, MODIFICATIONDATE, DELETED, ICON, NAME, VERSION, CREATEDBY_ID, MODIFIEDBY_ID, OWNER_ID, PARENT_ID, READFORALL, SHARED, USER) = range(14) |
66 |
|
67 |
class ObjectMigration(Migration): |
68 |
def __init__(self, old_db, db, f): |
69 |
Migration.__init__(self, old_db) |
70 |
self.cache = Cache(db) |
71 |
|
72 |
def create_node(self, username, container, object): |
73 |
node = self.backend.node.node_lookup(object) |
74 |
if not node: |
75 |
parent_path = '%s/%s' %(username, container) |
76 |
parent_node = self.backend.node.node_lookup(parent_path) |
77 |
if not parent_node: |
78 |
raise Exception('Missing node') |
79 |
node = self.backend.node.node_create(parent_node, object) |
80 |
return node |
81 |
|
82 |
def create_history(self, header_id, node_id, deleted=False): |
83 |
i = 0 |
84 |
map = HashMap(self.backend.block_size, self.backend.hash_algorithm) |
85 |
v = [] |
86 |
stored_versions = self.backend.node.node_get_versions(node_id, ['mtime']) |
87 |
stored_versions_mtime = [datetime.datetime.utcfromtimestamp(elem[0]) for elem in stored_versions] |
88 |
for t, rowcount in self.retrieve_node_versions(header_id): |
89 |
size, modyfied_by, filepath, mimetype, mdate = t |
90 |
if mdate in stored_versions_mtime: |
91 |
continue |
92 |
cluster = CLUSTER_HISTORY if i < rowcount - 1 else CLUSTER_NORMAL |
93 |
cluster = cluster if not deleted else CLUSTER_DELETED |
94 |
hash = self.cache.get(filepath) |
95 |
if hash == None: |
96 |
raise Exception("Missing hash") |
97 |
args = node_id, hash, size, modyfied_by, cluster, mimetype, mdate |
98 |
v.append(self.create_version(*args)) |
99 |
i += 1 |
100 |
return v |
101 |
|
102 |
def create_version(self, node_id, hash, size, modyfied_by, cluster, mimetype, mdate): |
103 |
args = (node_id, hash, size, None, modyfied_by, cluster) |
104 |
serial = self.backend.node.version_create(*args)[0] |
105 |
meta = {'hash':hash, |
106 |
'content-type':mimetype} |
107 |
self.backend.node.attribute_set(serial, ((k, v) for k, v in meta.iteritems())) |
108 |
timestamp = timegm(mdate.timetuple()) |
109 |
microseconds = mdate.time().microsecond |
110 |
values = timestamp, microseconds, serial |
111 |
f.write('update versions set mtime=\'%10d.%6d\' where serial=%s;' %values) |
112 |
return serial |
113 |
|
114 |
def create_tags(self, header_id, node_id, vserials): |
115 |
tags = self.retrieve_tags(header_id) |
116 |
if not tags: |
117 |
return |
118 |
for v in vserials: |
119 |
self.backend.node.attribute_set(v, (('tag', tags),)) |
120 |
|
121 |
def create_permissions(self, fid, path, owner): |
122 |
fpath, permissions = self.backend.permissions.access_inherit(path) |
123 |
if not permissions: |
124 |
permissions = self.retrieve_permissions(fid) |
125 |
keys = ('read', 'write') |
126 |
for k in keys: |
127 |
if owner in permissions[k]: |
128 |
permissions[k].remove(owner) |
129 |
self.backend.permissions.access_set(path, permissions) |
130 |
|
131 |
def create_objects(self): |
132 |
for t in self.retrieve_current_nodes(): |
133 |
username, headerid, folderid, filename, deleted, filepath, mimetype, public, owner_id = t |
134 |
containers = ['pithos', 'trash'] |
135 |
|
136 |
for c in containers: |
137 |
#create container if it does not exist |
138 |
try: |
139 |
self.backend._lookup_container(username, c) |
140 |
except NameError, e: |
141 |
self.backend.put_container(username, username, c) |
142 |
|
143 |
container = 'pithos' if not deleted else 'trash' |
144 |
path = self.build_path(folderid) |
145 |
|
146 |
#create node |
147 |
object = '%s/%s/%s/%s' %(username, container, path, filename) |
148 |
args = username, container, object, filepath, mimetype |
149 |
nodeid = self.create_node(*args) |
150 |
|
151 |
#create node history |
152 |
vserials = self.create_history(headerid, nodeid, deleted) |
153 |
|
154 |
#set object tags |
155 |
self.create_tags(headerid, nodeid, vserials) |
156 |
|
157 |
#set object's publicity |
158 |
if public: |
159 |
self.backend.permissions.public_set(object) |
160 |
|
161 |
#set object's permissions |
162 |
self.create_permissions(headerid, object, username) |
163 |
|
164 |
def build_path(self, child_id): |
165 |
folder = Table('folder', self.metadata, autoload=True) |
166 |
user = Table('gss_user', self.metadata, autoload=True) |
167 |
j = folder.join(user, folder.c.owner_id == user.c.id) |
168 |
s = select([folder, user.c.username], from_obj=j) |
169 |
s = s.where(folder.c.id == child_id) |
170 |
s.order_by(folder.c.modificationdate) |
171 |
rp = self.conn.execute(s) |
172 |
t = rp.fetchone() |
173 |
md5 = hashlib.md5() |
174 |
hash = md5.hexdigest().lower() |
175 |
size = 0 |
176 |
if not t[PARENT_ID]: |
177 |
return '' |
178 |
else: |
179 |
container_path = t[USER] |
180 |
container_path += '/trash' if t[DELETED] else '/pithos' |
181 |
parent_node = self.backend.node.node_lookup(container_path) |
182 |
if not parent_node: |
183 |
raise Exception('Missing node:', container_path) |
184 |
parent_path = self.build_path(t[PARENT_ID]) |
185 |
path = '%s/%s/%s' %(container_path, parent_path, t[NAME]) if parent_path else '%s/%s' %(container_path, t[NAME]) |
186 |
node = self.backend.node.node_lookup(path) |
187 |
if not node: |
188 |
node = self.backend.node.node_create(parent_node, path) |
189 |
if not node: |
190 |
raise Exception('Unable to create node:', path) |
191 |
|
192 |
#create versions |
193 |
v = self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[CREATIONDATE]) |
194 |
if t[CREATIONDATE] != t[MODIFICATIONDATE]: |
195 |
self.backend.node.version_recluster(v, CLUSTER_HISTORY) |
196 |
self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[MODIFICATIONDATE]) |
197 |
|
198 |
#set permissions |
199 |
self.create_permissions(t[ID], path, t[USER]) |
200 |
return '%s/%s' %(parent_path, t[NAME]) if parent_path else t[NAME] |
201 |
|
202 |
def retrieve_current_nodes(self): |
203 |
fileheader = Table('fileheader', self.metadata, autoload=True) |
204 |
filebody = Table('filebody', self.metadata, autoload=True) |
205 |
folder = Table('folder', self.metadata, autoload=True) |
206 |
gss_user = Table('gss_user', self.metadata, autoload=True) |
207 |
j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id) |
208 |
j = j.join(folder, fileheader.c.folder_id == folder.c.id) |
209 |
j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id) |
210 |
s = select([gss_user.c.username, fileheader.c.id, fileheader.c.folder_id, |
211 |
fileheader.c.name, fileheader.c.deleted, |
212 |
filebody.c.storedfilepath, filebody.c.mimetype, |
213 |
fileheader.c.readforall, fileheader.c.owner_id], from_obj=j) |
214 |
rp = self.conn.execute(s) |
215 |
object = rp.fetchone() |
216 |
while object: |
217 |
yield object |
218 |
object = rp.fetchone() |
219 |
rp.close() |
220 |
|
221 |
def retrieve_node_versions(self, header_id): |
222 |
filebody = Table('filebody', self.metadata, autoload=True) |
223 |
gss_user = Table('gss_user', self.metadata, autoload=True) |
224 |
j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id) |
225 |
s = select([filebody.c.filesize, gss_user.c.username, |
226 |
filebody.c.storedfilepath, filebody.c.mimetype, |
227 |
filebody.c.modificationdate], from_obj=j) |
228 |
s = s.where(filebody.c.header_id == header_id) |
229 |
s = s.order_by(filebody.c.version) |
230 |
rp = self.conn.execute(s) |
231 |
version = rp.fetchone() |
232 |
while version: |
233 |
yield version, rp.rowcount |
234 |
version = rp.fetchone() |
235 |
rp.close() |
236 |
|
237 |
def retrieve_tags(self, header_id): |
238 |
filetag = Table('filetag', self.metadata, autoload=True) |
239 |
s = select([filetag.c.tag], filetag.c.fileid == header_id) |
240 |
rp = self.conn.execute(s) |
241 |
tags = rp.fetchall() if rp.returns_rows else [] |
242 |
tags = [elem[0] for elem in tags] |
243 |
rp.close() |
244 |
return ','.join(tags) if tags else '' |
245 |
|
246 |
def retrieve_permissions(self, id, is_folder=True): |
247 |
permissions = {} |
248 |
if is_folder: |
249 |
ftable = Table('folder_permission', self.metadata, autoload=True) |
250 |
else: |
251 |
ftable = Table('fileheader_permission', self.metadata, autoload=True) |
252 |
permission = Table('permission', self.metadata, autoload=True) |
253 |
group = Table('gss_group', self.metadata, autoload=True) |
254 |
user = Table('gss_user', self.metadata, autoload=True) |
255 |
j = ftable.join(permission, ftable.c.permissions_id == permission.c.id) |
256 |
j1 = j.join(group, group.c.id == permission.c.group_id) |
257 |
j2 = j.join(user, user.c.id == permission.c.user_id) |
258 |
|
259 |
permissions = defaultdict(list) |
260 |
|
261 |
#get folder read groups |
262 |
s = select([group.c.name], from_obj=j1) |
263 |
if is_folder: |
264 |
s = s.where(ftable.c.folder_id == id) |
265 |
else: |
266 |
s = s.where(ftable.c.fileheader_id == id) |
267 |
s = s.where(permission.c.read == True) |
268 |
s = s.where(permission.c.group_id != None) |
269 |
rp = self.conn.execute(s) |
270 |
permissions['read'].extend([e[0] for e in rp.fetchall()]) |
271 |
|
272 |
#get folder read users |
273 |
s = select([user.c.username], from_obj=j2) |
274 |
if is_folder: |
275 |
s = s.where(ftable.c.folder_id == id) |
276 |
else: |
277 |
s = s.where(ftable.c.fileheader_id == id) |
278 |
s = s.where(permission.c.read == True) |
279 |
s = s.where(permission.c.user_id != None) |
280 |
rp = self.conn.execute(s) |
281 |
permissions['read'].extend([e[0] for e in rp.fetchall()]) |
282 |
|
283 |
#get folder write groups |
284 |
s = select([group.c.name], from_obj=j1) |
285 |
if is_folder: |
286 |
s = s.where(ftable.c.folder_id == id) |
287 |
else: |
288 |
s = s.where(ftable.c.fileheader_id == id) |
289 |
s = s.where(permission.c.write == True) |
290 |
s = s.where(permission.c.group_id != None) |
291 |
rp = self.conn.execute(s) |
292 |
permissions['write'].extend([e[0] for e in rp.fetchall()]) |
293 |
|
294 |
#get folder write groups |
295 |
s = select([user.c.username], from_obj=j2) |
296 |
if is_folder: |
297 |
s = s.where(ftable.c.folder_id == id) |
298 |
else: |
299 |
s = s.where(ftable.c.fileheader_id == id) |
300 |
s = s.where(permission.c.write == True) |
301 |
s = s.where(permission.c.user_id != None) |
302 |
rp = self.conn.execute(s) |
303 |
permissions['write'].extend([e[0] for e in rp.fetchall()]) |
304 |
|
305 |
rp.close() |
306 |
return permissions |
307 |
|
308 |
if __name__ == "__main__": |
309 |
old_db = '' |
310 |
db = '' |
311 |
|
312 |
f = open('fixdates.sql', 'w') |
313 |
ot = ObjectMigration(old_db, db, f) |
314 |
ot.create_objects() |
315 |
f.close() |
316 |
|
317 |
|