root / tools / migrate-db @ f6c0005f
History | View | Annotate | Download (8 kB)
1 |
#!/usr/bin/env python |
---|---|
2 |
|
3 |
# Copyright 2011 GRNET S.A. All rights reserved. |
4 |
# |
5 |
# Redistribution and use in source and binary forms, with or |
6 |
# without modification, are permitted provided that the following |
7 |
# conditions are met: |
8 |
# |
9 |
# 1. Redistributions of source code must retain the above |
10 |
# copyright notice, this list of conditions and the following |
11 |
# disclaimer. |
12 |
# |
13 |
# 2. Redistributions in binary form must reproduce the above |
14 |
# copyright notice, this list of conditions and the following |
15 |
# disclaimer in the documentation and/or other materials |
16 |
# provided with the distribution. |
17 |
# |
18 |
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS |
19 |
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
20 |
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
21 |
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR |
22 |
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 |
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 |
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
25 |
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
26 |
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 |
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
28 |
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 |
# POSSIBILITY OF SUCH DAMAGE. |
30 |
# |
31 |
# The views and conclusions contained in the software and |
32 |
# documentation are those of the authors and should not be |
33 |
# interpreted as representing official policies, either expressed |
34 |
# or implied, of GRNET S.A. |
35 |
|
36 |
from sqlalchemy import Table |
37 |
from sqlalchemy.sql import select |
38 |
|
39 |
from binascii import hexlify |
40 |
|
41 |
from pithos.backends.lib.hashfiler import Blocker |
42 |
from pithos.backends.lib.sqlalchemy import Node |
43 |
from pithos.aai.models import PithosUser |
44 |
|
45 |
from django.conf import settings |
46 |
|
47 |
from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED |
48 |
from pithos.backends.lib.sqlalchemy.node import Node |
49 |
|
50 |
from lib.transfer import upload |
51 |
from lib.hashmap import HashMap, file_read_iterator |
52 |
from lib.client import Fault |
53 |
from lib.migrate import Migration, Cache |
54 |
from calendar import timegm |
55 |
|
56 |
import json |
57 |
import os |
58 |
import sys |
59 |
import hashlib |
60 |
import mimetypes |
61 |
|
62 |
class ObjectMigration(Migration): |
63 |
def __init__(self, old_db, db, f): |
64 |
Migration.__init__(self, old_db) |
65 |
self.cache = Cache(db) |
66 |
|
67 |
def create_node(self, username, container, object, filepath, mimetype): |
68 |
obj = '' |
69 |
path = '/'.join(object.split('/')[:-1]) |
70 |
name = object.split('/')[-1] |
71 |
#create directory markers |
72 |
for f in path.split('/'): |
73 |
obj = '%s/%s' %(obj, f) if obj else f |
74 |
try: |
75 |
md5 = hashlib.md5() |
76 |
meta = {'Content-Type':'application/directory', |
77 |
'hash': md5.hexdigest().lower()} |
78 |
self.backend.update_object_hashmap(username, username, container, obj, 0, [], meta) |
79 |
except NameError, e: |
80 |
pass |
81 |
|
82 |
parent_path = '%s/%s' %(username, container) |
83 |
parent_node = self.backend.node.node_lookup(parent_path) |
84 |
path = '%s/%s' %(parent_path, object) |
85 |
nodeid = self.backend.node.node_create(parent_node, path) |
86 |
return nodeid |
87 |
|
88 |
def create_history(self, header_id, node_id, deleted=False): |
89 |
i = 0 |
90 |
map = HashMap(self.backend.block_size, self.backend.hash_algorithm) |
91 |
for t, rowcount in self.retrieve_node_versions(header_id): |
92 |
size, modyfied_by, filepath, mimetype, modificationdate = t |
93 |
cluster = CLUSTER_HISTORY if i < rowcount - 1 else CLUSTER_NORMAL |
94 |
cluster = cluster if not deleted else CLUSTER_DELETED |
95 |
hash = self.cache.get(filepath) |
96 |
if hash == None: |
97 |
raise Exception("Missing hash") |
98 |
args = (node_id, hash, size, None, modyfied_by, cluster) |
99 |
serial = self.backend.node.version_create(*args)[0] |
100 |
meta = {'hash':hash, |
101 |
'content-type':mimetype} |
102 |
self.backend.node.attribute_set(serial, ((k, v) for k, v in meta.iteritems())) |
103 |
timestamp = timegm(modificationdate.timetuple()) |
104 |
microseconds = modificationdate.time().microsecond |
105 |
f.write('update versions set mtime=\'%10d.%6d\' where serial=%s;' %(timestamp, microseconds, serial)) |
106 |
i += 1 |
107 |
|
108 |
def create_metadata(self, header_id, node_id): |
109 |
for t in self.retrieve_metadata(header_id): |
110 |
pass |
111 |
|
112 |
def create_objects(self): |
113 |
for username, headerid, folderid, filename, deleted, filepath, mimetype in self.retrieve_current_nodes(): |
114 |
path = self.retrieve_path(folderid)[1:] |
115 |
container = 'pithos' if not deleted else 'trash' |
116 |
|
117 |
#create container if it does not exist |
118 |
try: |
119 |
self.backend._lookup_container(username, container) |
120 |
except NameError: |
121 |
self.backend.put_container(username, username, container) |
122 |
|
123 |
#create node |
124 |
object = '%s/%s' %(path, filename) |
125 |
nodeid = self.create_node(username, container, object, filepath, mimetype) |
126 |
|
127 |
#create node history |
128 |
self.create_history(headerid, nodeid, deleted) |
129 |
|
130 |
self.create_metadata(headerid, nodeid) |
131 |
#self.set_public() |
132 |
#self.statistics() |
133 |
#self.set_permissions() |
134 |
|
135 |
def retrieve_path(self, child_id): |
136 |
folderTable = Table('folder', self.metadata, autoload=True) |
137 |
s = select([folderTable.c.parent_id, folderTable.c.name]) |
138 |
s = s.where(folderTable.c.id == child_id) |
139 |
rp = self.conn.execute(s) |
140 |
parent_id, foldername = rp.fetchone() |
141 |
if not parent_id: |
142 |
return '' |
143 |
else: |
144 |
return '%s/%s' %(self.retrieve_path(parent_id), foldername) |
145 |
|
146 |
def retrieve_current_nodes(self): |
147 |
fileheader = Table('fileheader', self.metadata, autoload=True) |
148 |
filebody = Table('filebody', self.metadata, autoload=True) |
149 |
folder = Table('folder', self.metadata, autoload=True) |
150 |
gss_user = Table('gss_user', self.metadata, autoload=True) |
151 |
j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id) |
152 |
j = j.join(folder, fileheader.c.folder_id == folder.c.id) |
153 |
j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id) |
154 |
s = select([gss_user.c.username, fileheader.c.id, fileheader.c.folder_id, |
155 |
fileheader.c.name, fileheader.c.deleted, filebody.c.storedfilepath, |
156 |
filebody.c.mimetype], from_obj=j) |
157 |
s = s.limit(1) |
158 |
rp = self.conn.execute(s) |
159 |
object = rp.fetchone() |
160 |
while object: |
161 |
yield object |
162 |
object = rp.fetchone() |
163 |
rp.close() |
164 |
|
165 |
def retrieve_node_versions(self, header_id): |
166 |
filebody = Table('filebody', self.metadata, autoload=True) |
167 |
gss_user = Table('gss_user', self.metadata, autoload=True) |
168 |
j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id) |
169 |
s = select([filebody.c.filesize, gss_user.c.username, |
170 |
filebody.c.storedfilepath, filebody.c.mimetype, |
171 |
filebody.c.modificationdate], from_obj=j) |
172 |
s = s.where(filebody.c.header_id == header_id) |
173 |
s = s.order_by(filebody.c.version) |
174 |
rp = self.conn.execute(s) |
175 |
version = rp.fetchone() |
176 |
while version: |
177 |
yield version, rp.rowcount |
178 |
version = rp.fetchone() |
179 |
rp.close() |
180 |
|
181 |
def retrieve_metadata(self, header_id): |
182 |
filetag = Table('filetag', self.metadata, autoload=True) |
183 |
s = filetag.select(filetag.c.fileid == header_id) |
184 |
rp = self.conn.execute(s) |
185 |
tag = rp.fetchone() |
186 |
while tag: |
187 |
yield tag |
188 |
tag = tp.fetchone() |
189 |
rp.close() |
190 |
|
191 |
def handle_deleted(self): |
192 |
pass |
193 |
|
194 |
if __name__ == "__main__": |
195 |
old_db = '' |
196 |
db = '' |
197 |
|
198 |
f = open('fixdates.sql', 'w') |
199 |
ot = ObjectMigration(old_db, db, f) |
200 |
ot.create_objects() |
201 |
f.close() |
202 |
|
203 |
|