root / contrib / migrate-db @ 0fa71fcc
History | View | Annotate | Download (13.4 kB)
1 |
#!/usr/bin/env python |
---|---|
2 |
|
3 |
# Copyright 2011-2012 GRNET S.A. All rights reserved. |
4 |
# |
5 |
# Redistribution and use in source and binary forms, with or |
6 |
# without modification, are permitted provided that the following |
7 |
# conditions are met: |
8 |
# |
9 |
# 1. Redistributions of source code must retain the above |
10 |
# copyright notice, this list of conditions and the following |
11 |
# disclaimer. |
12 |
# |
13 |
# 2. Redistributions in binary form must reproduce the above |
14 |
# copyright notice, this list of conditions and the following |
15 |
# disclaimer in the documentation and/or other materials |
16 |
# provided with the distribution. |
17 |
# |
18 |
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS |
19 |
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
20 |
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
21 |
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR |
22 |
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 |
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 |
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
25 |
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
26 |
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 |
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
28 |
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 |
# POSSIBILITY OF SUCH DAMAGE. |
30 |
# |
31 |
# The views and conclusions contained in the software and |
32 |
# documentation are those of the authors and should not be |
33 |
# interpreted as representing official policies, either expressed |
34 |
# or implied, of GRNET S.A. |
35 |
|
36 |
from sqlalchemy import Table |
37 |
from sqlalchemy.sql import select, and_ |
38 |
|
39 |
from binascii import hexlify |
40 |
|
41 |
from pithos.backends.lib.hashfiler import Blocker |
42 |
from pithos.backends.lib.sqlalchemy import Node |
43 |
|
44 |
from django.conf import settings |
45 |
|
46 |
from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED |
47 |
from pithos.backends.lib.sqlalchemy.node import Node, ROOTNODE |
48 |
|
49 |
from pithos.tools.lib.transfer import upload |
50 |
from pithos.tools.lib.hashmap import HashMap |
51 |
from pithos.tools.lib.client import Fault |
52 |
|
53 |
from migrate import Migration, Cache |
54 |
|
55 |
from calendar import timegm |
56 |
from decimal import Decimal |
57 |
from collections import defaultdict |
58 |
|
59 |
import json |
60 |
import os |
61 |
import sys |
62 |
import hashlib |
63 |
import mimetypes |
64 |
import time |
65 |
import datetime |
66 |
|
67 |
(ID, CREATIONDATE, MODIFICATIONDATE, DELETED, ICON, NAME, VERSION, CREATEDBY_ID, MODIFIEDBY_ID, OWNER_ID, PARENT_ID, READFORALL, SHARED, USER) = range(14) |
68 |
|
69 |
class ObjectMigration(Migration): |
70 |
def __init__(self, old_db, db, f): |
71 |
Migration.__init__(self, old_db) |
72 |
self.cache = Cache(db) |
73 |
|
74 |
def create_node(self, username, container, object): |
75 |
node = self.backend.node.node_lookup(object) |
76 |
if not node: |
77 |
parent_path = '%s/%s' %(username, container) |
78 |
parent_node = self.backend.node.node_lookup(parent_path) |
79 |
if not parent_node: |
80 |
raise Exception('Missing node') |
81 |
node = self.backend.node.node_create(parent_node, object) |
82 |
return node |
83 |
|
84 |
def create_history(self, header_id, node_id, deleted=False): |
85 |
i = 0 |
86 |
map = HashMap(self.backend.block_size, self.backend.hash_algorithm) |
87 |
v = [] |
88 |
stored_versions = self.backend.node.node_get_versions(node_id, ['mtime']) |
89 |
stored_versions_mtime = [datetime.datetime.utcfromtimestamp(elem[0]) for elem in stored_versions] |
90 |
for t, rowcount in self.retrieve_node_versions(header_id): |
91 |
size, modyfied_by, filepath, mimetype, mdate = t |
92 |
if mdate in stored_versions_mtime: |
93 |
continue |
94 |
cluster = CLUSTER_HISTORY if i < rowcount - 1 else CLUSTER_NORMAL |
95 |
cluster = cluster if not deleted else CLUSTER_DELETED |
96 |
hash = self.cache.get(filepath) |
97 |
if hash == None: |
98 |
raise Exception("Missing hash") |
99 |
args = node_id, hash, size, modyfied_by, cluster, mimetype, mdate |
100 |
v.append(self.create_version(*args)) |
101 |
i += 1 |
102 |
return v |
103 |
|
104 |
def create_version(self, node_id, hash, size, modyfied_by, cluster, mimetype, mdate): |
105 |
args = (node_id, hash, size, None, modyfied_by, cluster) |
106 |
serial = self.backend.node.version_create(*args)[0] |
107 |
meta = {'hash':hash, |
108 |
'content-type':mimetype} |
109 |
self.backend.node.attribute_set(serial, ((k, v) for k, v in meta.iteritems())) |
110 |
timestamp = timegm(mdate.timetuple()) |
111 |
microseconds = mdate.time().microsecond |
112 |
values = timestamp, microseconds, serial |
113 |
f.write('update versions set mtime=\'%10d.%6d\' where serial=%s;' %values) |
114 |
return serial |
115 |
|
116 |
def create_tags(self, header_id, node_id, vserials): |
117 |
tags = self.retrieve_tags(header_id) |
118 |
if not tags: |
119 |
return |
120 |
for v in vserials: |
121 |
self.backend.node.attribute_set(v, (('X-Object-Meta-Tag', tags),)) |
122 |
|
123 |
def create_permissions(self, fid, path, owner, is_folder=True): |
124 |
fpath, fpermissions = self.backend.permissions.access_inherit(path) |
125 |
permissions = self.retrieve_permissions(fid, is_folder) |
126 |
if not fpermissions: |
127 |
keys = ('read', 'write') |
128 |
for k in keys: |
129 |
if owner in permissions[k]: |
130 |
permissions[k].remove(owner) |
131 |
self.backend.permissions.access_set(path, permissions) |
132 |
else: |
133 |
keys = ('read', 'write') |
134 |
common_p = {} |
135 |
for k in keys: |
136 |
if owner in permissions[k]: |
137 |
permissions[k].remove(owner) |
138 |
common = set(fpermissions[k]).intersection(set(permissions[k])) |
139 |
common_p[k] = list(common) |
140 |
#keep only the common permissions |
141 |
#trade off for securing access only to explicitly authorized users |
142 |
self.backend.permissions.access_set(fpath, common_p) |
143 |
|
144 |
def create_objects(self): |
145 |
for t in self.retrieve_current_nodes(): |
146 |
username, headerid, folderid, filename, deleted, filepath, mimetype, public, owner_id = t |
147 |
containers = ['pithos', 'trash'] |
148 |
|
149 |
for c in containers: |
150 |
#create container if it does not exist |
151 |
try: |
152 |
self.backend._lookup_container(username, c) |
153 |
except NameError, e: |
154 |
self.backend.put_container(username, username, c) |
155 |
|
156 |
container = 'pithos' if not deleted else 'trash' |
157 |
path = self.build_path(folderid) |
158 |
#create node |
159 |
object = '%s/%s' %(username, container) |
160 |
object = '%s/%s/%s' %(object, path, filename) if path else '%s/%s' %(object, filename) |
161 |
args = username, container, object |
162 |
nodeid = self.create_node(*args) |
163 |
#create node history |
164 |
vserials = self.create_history(headerid, nodeid, deleted) |
165 |
#set object tags |
166 |
self.create_tags(headerid, nodeid, vserials) |
167 |
#set object's publicity |
168 |
if public: |
169 |
self.backend.permissions.public_set( |
170 |
object, |
171 |
self.backend.public_url_security, |
172 |
self.backend.public_url_alphabet |
173 |
) |
174 |
#set object's permissions |
175 |
self.create_permissions(headerid, object, username, is_folder=False) |
176 |
|
177 |
def build_path(self, child_id): |
178 |
folder = Table('folder', self.metadata, autoload=True) |
179 |
user = Table('gss_user', self.metadata, autoload=True) |
180 |
j = folder.join(user, folder.c.owner_id == user.c.id) |
181 |
s = select([folder, user.c.username], from_obj=j) |
182 |
s = s.where(folder.c.id == child_id) |
183 |
s.order_by(folder.c.modificationdate) |
184 |
rp = self.conn.execute(s) |
185 |
t = rp.fetchone() |
186 |
md5 = hashlib.md5() |
187 |
hash = md5.hexdigest().lower() |
188 |
size = 0 |
189 |
if not t[PARENT_ID]: |
190 |
return '' |
191 |
else: |
192 |
container_path = t[USER] |
193 |
container_path += '/trash' if t[DELETED] else '/pithos' |
194 |
parent_node = self.backend.node.node_lookup(container_path) |
195 |
if not parent_node: |
196 |
raise Exception('Missing node:', container_path) |
197 |
parent_path = self.build_path(t[PARENT_ID]) |
198 |
path = '%s/%s/%s' %(container_path, parent_path, t[NAME]) if parent_path else '%s/%s' %(container_path, t[NAME]) |
199 |
node = self.backend.node.node_lookup(path) |
200 |
if not node: |
201 |
node = self.backend.node.node_create(parent_node, path) |
202 |
if not node: |
203 |
raise Exception('Unable to create node:', path) |
204 |
|
205 |
#create versions |
206 |
v = self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[CREATIONDATE]) |
207 |
if t[CREATIONDATE] != t[MODIFICATIONDATE]: |
208 |
self.backend.node.version_recluster(v, CLUSTER_HISTORY) |
209 |
self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[MODIFICATIONDATE]) |
210 |
|
211 |
#set permissions |
212 |
self.create_permissions(t[ID], path, t[USER], is_folder=True) |
213 |
return '%s/%s' %(parent_path, t[NAME]) if parent_path else t[NAME] |
214 |
|
215 |
def retrieve_current_nodes(self): |
216 |
fileheader = Table('fileheader', self.metadata, autoload=True) |
217 |
filebody = Table('filebody', self.metadata, autoload=True) |
218 |
folder = Table('folder', self.metadata, autoload=True) |
219 |
gss_user = Table('gss_user', self.metadata, autoload=True) |
220 |
j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id) |
221 |
j = j.join(folder, fileheader.c.folder_id == folder.c.id) |
222 |
j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id) |
223 |
s = select([gss_user.c.username, fileheader.c.id, fileheader.c.folder_id, |
224 |
fileheader.c.name, fileheader.c.deleted, |
225 |
filebody.c.storedfilepath, filebody.c.mimetype, |
226 |
fileheader.c.readforall, fileheader.c.owner_id], from_obj=j) |
227 |
rp = self.conn.execute(s) |
228 |
object = rp.fetchone() |
229 |
while object: |
230 |
yield object |
231 |
object = rp.fetchone() |
232 |
rp.close() |
233 |
|
234 |
def retrieve_node_versions(self, header_id): |
235 |
filebody = Table('filebody', self.metadata, autoload=True) |
236 |
gss_user = Table('gss_user', self.metadata, autoload=True) |
237 |
j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id) |
238 |
s = select([filebody.c.filesize, gss_user.c.username, |
239 |
filebody.c.storedfilepath, filebody.c.mimetype, |
240 |
filebody.c.modificationdate], from_obj=j) |
241 |
s = s.where(filebody.c.header_id == header_id) |
242 |
s = s.order_by(filebody.c.version) |
243 |
rp = self.conn.execute(s) |
244 |
version = rp.fetchone() |
245 |
while version: |
246 |
yield version, rp.rowcount |
247 |
version = rp.fetchone() |
248 |
rp.close() |
249 |
|
250 |
def retrieve_tags(self, header_id): |
251 |
filetag = Table('filetag', self.metadata, autoload=True) |
252 |
s = select([filetag.c.tag], filetag.c.fileid == header_id) |
253 |
rp = self.conn.execute(s) |
254 |
tags = rp.fetchall() if rp.returns_rows else [] |
255 |
tags = [elem[0] for elem in tags] |
256 |
rp.close() |
257 |
return ','.join(tags) if tags else '' |
258 |
|
259 |
def retrieve_permissions(self, id, is_folder=True): |
260 |
permissions = {} |
261 |
if is_folder: |
262 |
ftable = Table('folder_permission', self.metadata, autoload=True) |
263 |
else: |
264 |
ftable = Table('fileheader_permission', self.metadata, autoload=True) |
265 |
permission = Table('permission', self.metadata, autoload=True) |
266 |
group = Table('gss_group', self.metadata, autoload=True) |
267 |
user = Table('gss_user', self.metadata, autoload=True) |
268 |
j = ftable.join(permission, ftable.c.permissions_id == permission.c.id) |
269 |
j1 = j.join(group, group.c.id == permission.c.group_id) |
270 |
j2 = j.join(user, user.c.id == permission.c.user_id) |
271 |
|
272 |
permissions = defaultdict(list) |
273 |
|
274 |
def _get_permissions(self, action='read', get_groups=True): |
275 |
if get_groups: |
276 |
col, j = group.c.name, j1 |
277 |
cond2 = permission.c.group_id != None |
278 |
else: |
279 |
col, j = user.c.username, j2 |
280 |
cond2 = permission.c.user_id != None |
281 |
s = select([col], from_obj=j) |
282 |
if is_folder: |
283 |
s = s.where(ftable.c.folder_id == id) |
284 |
else: |
285 |
s = s.where(ftable.c.fileheader_id == id) |
286 |
if action == 'read': |
287 |
cond1 = permission.c.read == True |
288 |
else: |
289 |
cond1 = permission.c.write == True |
290 |
s = s.where(and_(cond1, cond2)) |
291 |
print '>', s, s.compile().params |
292 |
rp = self.conn.execute(s) |
293 |
p = permissions[action].extend([e[0] for e in rp.fetchall()]) |
294 |
rp.close() |
295 |
return p |
296 |
|
297 |
#get object read groups |
298 |
_get_permissions(self, action='read', get_groups=True) |
299 |
|
300 |
#get object read users |
301 |
_get_permissions(self, action='read', get_groups=False) |
302 |
|
303 |
#get object write groups |
304 |
_get_permissions(self, action='write', get_groups=True) |
305 |
|
306 |
#get object write groups |
307 |
_get_permissions(self, action='write', get_groups=False) |
308 |
|
309 |
return permissions |
310 |
|
311 |
if __name__ == "__main__": |
312 |
old_db = '' |
313 |
db = '' |
314 |
|
315 |
f = open('fixdates.sql', 'w') |
316 |
ot = ObjectMigration(old_db, db, f) |
317 |
ot.create_objects() |
318 |
f.close() |
319 |
|
320 |
|