root / tools / migrate-db @ 317c3efc
History | View | Annotate | Download (13.1 kB)
1 |
#!/usr/bin/env python |
---|---|
2 |
|
3 |
# Copyright 2011 GRNET S.A. All rights reserved. |
4 |
# |
5 |
# Redistribution and use in source and binary forms, with or |
6 |
# without modification, are permitted provided that the following |
7 |
# conditions are met: |
8 |
# |
9 |
# 1. Redistributions of source code must retain the above |
10 |
# copyright notice, this list of conditions and the following |
11 |
# disclaimer. |
12 |
# |
13 |
# 2. Redistributions in binary form must reproduce the above |
14 |
# copyright notice, this list of conditions and the following |
15 |
# disclaimer in the documentation and/or other materials |
16 |
# provided with the distribution. |
17 |
# |
18 |
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS |
19 |
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
20 |
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
21 |
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR |
22 |
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 |
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 |
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
25 |
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
26 |
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 |
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
28 |
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 |
# POSSIBILITY OF SUCH DAMAGE. |
30 |
# |
31 |
# The views and conclusions contained in the software and |
32 |
# documentation are those of the authors and should not be |
33 |
# interpreted as representing official policies, either expressed |
34 |
# or implied, of GRNET S.A. |
35 |
|
36 |
from sqlalchemy import Table |
37 |
from sqlalchemy.sql import select |
38 |
|
39 |
from binascii import hexlify |
40 |
|
41 |
from pithos.backends.lib.hashfiler import Blocker |
42 |
from pithos.backends.lib.sqlalchemy import Node |
43 |
|
44 |
from django.conf import settings |
45 |
|
46 |
from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED |
47 |
from pithos.backends.lib.sqlalchemy.node import Node, ROOTNODE |
48 |
|
49 |
from lib.transfer import upload |
50 |
from lib.hashmap import HashMap, file_read_iterator |
51 |
from lib.client import Fault |
52 |
from lib.migrate import Migration, Cache |
53 |
from calendar import timegm |
54 |
from decimal import Decimal |
55 |
from collections import defaultdict |
56 |
|
57 |
import json |
58 |
import os |
59 |
import sys |
60 |
import hashlib |
61 |
import mimetypes |
62 |
import time |
63 |
import datetime |
64 |
|
65 |
(ID, CREATIONDATE, MODIFICATIONDATE, DELETED, ICON, NAME, VERSION, CREATEDBY_ID, MODIFIEDBY_ID, OWNER_ID, PARENT_ID, READFORALL, SHARED, USER) = range(14) |
66 |
|
67 |
class ObjectMigration(Migration): |
68 |
def __init__(self, old_db, db, f): |
69 |
Migration.__init__(self, old_db) |
70 |
self.cache = Cache(db) |
71 |
|
72 |
def create_node(self, username, container, object): |
73 |
node = self.backend.node.node_lookup(object) |
74 |
if not node: |
75 |
parent_path = '%s/%s' %(username, container) |
76 |
parent_node = self.backend.node.node_lookup(parent_path) |
77 |
if not parent_node: |
78 |
raise Exception('Missing node') |
79 |
node = self.backend.node.node_create(parent_node, object) |
80 |
return node |
81 |
|
82 |
def create_history(self, header_id, node_id, deleted=False): |
83 |
i = 0 |
84 |
map = HashMap(self.backend.block_size, self.backend.hash_algorithm) |
85 |
v = [] |
86 |
stored_versions = self.backend.node.node_get_versions(node_id, ['mtime']) |
87 |
stored_versions_mtime = [datetime.datetime.utcfromtimestamp(elem[0]) for elem in stored_versions] |
88 |
for t, rowcount in self.retrieve_node_versions(header_id): |
89 |
size, modyfied_by, filepath, mimetype, mdate = t |
90 |
if mdate in stored_versions_mtime: |
91 |
continue |
92 |
cluster = CLUSTER_HISTORY if i < rowcount - 1 else CLUSTER_NORMAL |
93 |
cluster = cluster if not deleted else CLUSTER_DELETED |
94 |
hash = self.cache.get(filepath) |
95 |
if hash == None: |
96 |
raise Exception("Missing hash") |
97 |
args = node_id, hash, size, modyfied_by, cluster, mimetype, mdate |
98 |
v.append(self.create_version(*args)) |
99 |
i += 1 |
100 |
return v |
101 |
|
102 |
def create_version(self, node_id, hash, size, modyfied_by, cluster, mimetype, mdate): |
103 |
args = (node_id, hash, size, None, modyfied_by, cluster) |
104 |
serial = self.backend.node.version_create(*args)[0] |
105 |
meta = {'hash':hash, |
106 |
'content-type':mimetype} |
107 |
self.backend.node.attribute_set(serial, ((k, v) for k, v in meta.iteritems())) |
108 |
timestamp = timegm(mdate.timetuple()) |
109 |
microseconds = mdate.time().microsecond |
110 |
values = timestamp, microseconds, serial |
111 |
f.write('update versions set mtime=\'%10d.%6d\' where serial=%s;' %values) |
112 |
return serial |
113 |
|
114 |
def create_tags(self, header_id, node_id, vserials): |
115 |
tags = self.retrieve_tags(header_id) |
116 |
if not tags: |
117 |
return |
118 |
for v in vserials: |
119 |
self.backend.node.attribute_set(v, (('X-Object-Meta-Tag', tags),)) |
120 |
|
121 |
def create_permissions(self, fid, path, owner): |
122 |
fpath, permissions = self.backend.permissions.access_inherit(path) |
123 |
if not permissions: |
124 |
permissions = self.retrieve_permissions(fid) |
125 |
keys = ('read', 'write') |
126 |
for k in keys: |
127 |
if owner in permissions[k]: |
128 |
permissions[k].remove(owner) |
129 |
self.backend.permissions.access_set(path, permissions) |
130 |
|
131 |
def create_objects(self): |
132 |
for t in self.retrieve_current_nodes(): |
133 |
username, headerid, folderid, filename, deleted, filepath, mimetype, public, owner_id = t |
134 |
containers = ['pithos', 'trash'] |
135 |
|
136 |
for c in containers: |
137 |
#create container if it does not exist |
138 |
try: |
139 |
self.backend._lookup_container(username, c) |
140 |
except NameError, e: |
141 |
self.backend.put_container(username, username, c) |
142 |
|
143 |
container = 'pithos' if not deleted else 'trash' |
144 |
path = self.build_path(folderid) |
145 |
#create node |
146 |
object = '%s/%s' %(username, container) |
147 |
object = '%s/%s/%s' %(object, path, filename) if path else '%s/%s' %(object, filename) |
148 |
args = username, container, object |
149 |
nodeid = self.create_node(*args) |
150 |
#create node history |
151 |
vserials = self.create_history(headerid, nodeid, deleted) |
152 |
#set object tags |
153 |
self.create_tags(headerid, nodeid, vserials) |
154 |
#set object's publicity |
155 |
if public: |
156 |
self.backend.permissions.public_set(object) |
157 |
#set object's permissions |
158 |
self.create_permissions(headerid, object, username) |
159 |
|
160 |
def build_path(self, child_id): |
161 |
folder = Table('folder', self.metadata, autoload=True) |
162 |
user = Table('gss_user', self.metadata, autoload=True) |
163 |
j = folder.join(user, folder.c.owner_id == user.c.id) |
164 |
s = select([folder, user.c.username], from_obj=j) |
165 |
s = s.where(folder.c.id == child_id) |
166 |
s.order_by(folder.c.modificationdate) |
167 |
rp = self.conn.execute(s) |
168 |
t = rp.fetchone() |
169 |
md5 = hashlib.md5() |
170 |
hash = md5.hexdigest().lower() |
171 |
size = 0 |
172 |
if not t[PARENT_ID]: |
173 |
return '' |
174 |
else: |
175 |
container_path = t[USER] |
176 |
container_path += '/trash' if t[DELETED] else '/pithos' |
177 |
parent_node = self.backend.node.node_lookup(container_path) |
178 |
if not parent_node: |
179 |
raise Exception('Missing node:', container_path) |
180 |
parent_path = self.build_path(t[PARENT_ID]) |
181 |
path = '%s/%s/%s' %(container_path, parent_path, t[NAME]) if parent_path else '%s/%s' %(container_path, t[NAME]) |
182 |
node = self.backend.node.node_lookup(path) |
183 |
if not node: |
184 |
node = self.backend.node.node_create(parent_node, path) |
185 |
if not node: |
186 |
raise Exception('Unable to create node:', path) |
187 |
|
188 |
#create versions |
189 |
v = self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[CREATIONDATE]) |
190 |
if t[CREATIONDATE] != t[MODIFICATIONDATE]: |
191 |
self.backend.node.version_recluster(v, CLUSTER_HISTORY) |
192 |
self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[MODIFICATIONDATE]) |
193 |
|
194 |
#set permissions |
195 |
self.create_permissions(t[ID], path, t[USER]) |
196 |
return '%s/%s' %(parent_path, t[NAME]) if parent_path else t[NAME] |
197 |
|
198 |
def retrieve_current_nodes(self): |
199 |
fileheader = Table('fileheader', self.metadata, autoload=True) |
200 |
filebody = Table('filebody', self.metadata, autoload=True) |
201 |
folder = Table('folder', self.metadata, autoload=True) |
202 |
gss_user = Table('gss_user', self.metadata, autoload=True) |
203 |
j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id) |
204 |
j = j.join(folder, fileheader.c.folder_id == folder.c.id) |
205 |
j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id) |
206 |
s = select([gss_user.c.username, fileheader.c.id, fileheader.c.folder_id, |
207 |
fileheader.c.name, fileheader.c.deleted, |
208 |
filebody.c.storedfilepath, filebody.c.mimetype, |
209 |
fileheader.c.readforall, fileheader.c.owner_id], from_obj=j) |
210 |
|
211 |
rp = self.conn.execute(s) |
212 |
object = rp.fetchone() |
213 |
while object: |
214 |
yield object |
215 |
object = rp.fetchone() |
216 |
rp.close() |
217 |
|
218 |
def retrieve_node_versions(self, header_id): |
219 |
filebody = Table('filebody', self.metadata, autoload=True) |
220 |
gss_user = Table('gss_user', self.metadata, autoload=True) |
221 |
j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id) |
222 |
s = select([filebody.c.filesize, gss_user.c.username, |
223 |
filebody.c.storedfilepath, filebody.c.mimetype, |
224 |
filebody.c.modificationdate], from_obj=j) |
225 |
s = s.where(filebody.c.header_id == header_id) |
226 |
s = s.order_by(filebody.c.version) |
227 |
rp = self.conn.execute(s) |
228 |
version = rp.fetchone() |
229 |
while version: |
230 |
yield version, rp.rowcount |
231 |
version = rp.fetchone() |
232 |
rp.close() |
233 |
|
234 |
def retrieve_tags(self, header_id): |
235 |
filetag = Table('filetag', self.metadata, autoload=True) |
236 |
s = select([filetag.c.tag], filetag.c.fileid == header_id) |
237 |
rp = self.conn.execute(s) |
238 |
tags = rp.fetchall() if rp.returns_rows else [] |
239 |
tags = [elem[0] for elem in tags] |
240 |
rp.close() |
241 |
return ','.join(tags) if tags else '' |
242 |
|
243 |
def retrieve_permissions(self, id, is_folder=True): |
244 |
permissions = {} |
245 |
if is_folder: |
246 |
ftable = Table('folder_permission', self.metadata, autoload=True) |
247 |
else: |
248 |
ftable = Table('fileheader_permission', self.metadata, autoload=True) |
249 |
permission = Table('permission', self.metadata, autoload=True) |
250 |
group = Table('gss_group', self.metadata, autoload=True) |
251 |
user = Table('gss_user', self.metadata, autoload=True) |
252 |
j = ftable.join(permission, ftable.c.permissions_id == permission.c.id) |
253 |
j1 = j.join(group, group.c.id == permission.c.group_id) |
254 |
j2 = j.join(user, user.c.id == permission.c.user_id) |
255 |
|
256 |
permissions = defaultdict(list) |
257 |
|
258 |
#get folder read groups |
259 |
s = select([group.c.name], from_obj=j1) |
260 |
if is_folder: |
261 |
s = s.where(ftable.c.folder_id == id) |
262 |
else: |
263 |
s = s.where(ftable.c.fileheader_id == id) |
264 |
s = s.where(permission.c.read == True) |
265 |
s = s.where(permission.c.group_id != None) |
266 |
rp = self.conn.execute(s) |
267 |
permissions['read'].extend([e[0] for e in rp.fetchall()]) |
268 |
|
269 |
#get folder read users |
270 |
s = select([user.c.username], from_obj=j2) |
271 |
if is_folder: |
272 |
s = s.where(ftable.c.folder_id == id) |
273 |
else: |
274 |
s = s.where(ftable.c.fileheader_id == id) |
275 |
s = s.where(permission.c.read == True) |
276 |
s = s.where(permission.c.user_id != None) |
277 |
rp = self.conn.execute(s) |
278 |
permissions['read'].extend([e[0] for e in rp.fetchall()]) |
279 |
|
280 |
#get folder write groups |
281 |
s = select([group.c.name], from_obj=j1) |
282 |
if is_folder: |
283 |
s = s.where(ftable.c.folder_id == id) |
284 |
else: |
285 |
s = s.where(ftable.c.fileheader_id == id) |
286 |
s = s.where(permission.c.write == True) |
287 |
s = s.where(permission.c.group_id != None) |
288 |
rp = self.conn.execute(s) |
289 |
permissions['write'].extend([e[0] for e in rp.fetchall()]) |
290 |
|
291 |
#get folder write groups |
292 |
s = select([user.c.username], from_obj=j2) |
293 |
if is_folder: |
294 |
s = s.where(ftable.c.folder_id == id) |
295 |
else: |
296 |
s = s.where(ftable.c.fileheader_id == id) |
297 |
s = s.where(permission.c.write == True) |
298 |
s = s.where(permission.c.user_id != None) |
299 |
rp = self.conn.execute(s) |
300 |
permissions['write'].extend([e[0] for e in rp.fetchall()]) |
301 |
|
302 |
rp.close() |
303 |
return permissions |
304 |
|
305 |
if __name__ == "__main__": |
306 |
old_db = '' |
307 |
db = '' |
308 |
|
309 |
f = open('fixdates.sql', 'w') |
310 |
ot = ObjectMigration(old_db, db, f) |
311 |
ot.create_objects() |
312 |
f.close() |
313 |
|
314 |
|