Migration tool Progress I
[pithos] / tools / migration
1 #!/usr/bin/env python
2
3 # Copyright 2011 GRNET S.A. All rights reserved.
4
5 # Redistribution and use in source and binary forms, with or
6 # without modification, are permitted provided that the following
7 # conditions are met:
8
9 #   1. Redistributions of source code must retain the above
10 #      copyright notice, this list of conditions and the following
11 #      disclaimer.
12
13 #   2. Redistributions in binary form must reproduce the above
14 #      copyright notice, this list of conditions and the following
15 #      disclaimer in the documentation and/or other materials
16 #      provided with the distribution.
17
18 # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
19 # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
25 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 # POSSIBILITY OF SUCH DAMAGE.
30
31 # The views and conclusions contained in the software and
32 # documentation are those of the authors and should not be
33 # interpreted as representing official policies, either expressed
34 # or implied, of GRNET S.A.
35
36 from sqlalchemy import create_engine
37 from sqlalchemy import Table, MetaData
38 from sqlalchemy.sql import select
39
40 from pithos.api.util import hashmap_hash, get_container_headers
41 from pithos.backends.lib.hashfiler import Blocker, Mapper
42 from pithos.aai.models import PithosUser
43
44 from django.conf import settings
45
46 from pithos.backends.modular import ModularBackend
47
48 import json
49 import base64
50 import os
51
52 class Migration(object):
53     def __init__(self, db):
54         self.engine = create_engine(db)
55         self.metadata = MetaData(self.engine)
56         #self.engine.echo = True
57         self.conn = self.engine.connect()
58     
59     def execute(self):
60         pass
61     
62 class UserMigration(Migration):
63     def __init__(self, db):
64         Migration.__init__(self, db)
65         self.gss_users = Table('gss_user', self.metadata, autoload=True)
66     
67     def execute(self):
68         s = self.gss_users.select()
69         users = self.conn.execute(s).fetchall()
70         l = []
71         for u in users:
72             user = PithosUser()
73             user.pk = u['id']
74             user.uniq = u['username']
75             user.realname = u['name']
76             user.is_admin = False
77             user.affiliation = u['homeorganization'] if u['homeorganization'] else ''
78             user.auth_token = base64.b64encode(u['authtoken'])
79             user.auth_token_created = u['creationdate']
80             user.auth_token_expires = u['authtokenexpirydate']
81             user.created = u['creationdate']
82             user.updated = u['modificationdate']
83             print '#', user
84             user.save(update_timestamps=False)
85     
86 class DataMigration(Migration):
87     def __init__(self, db, path, block_size, hash_algorithm):
88         Migration.__init__(self, db)
89         params = {'blocksize': block_size,
90                   'blockpath': os.path.join(path + '/blocks'),
91                   'hashtype': hash_algorithm}
92         self.blocker = Blocker(**params)
93         
94         params = {'mappath': os.path.join(path + '/maps'),
95                   'namelen': self.blocker.hashlen}
96         self.mapper = Mapper(**params)
97     
98     def execute(self):
99         filebody = Table('filebody', self.metadata, autoload=True)
100         s = select([filebody.c.id, filebody.c.storedfilepath])
101         rp = self.conn.execute(s)
102         
103         #basepath = '/Users/butters/Downloads'
104         #files = os.listdir(basepath)
105         #files = [f for f in files if not os.path.isdir(os.path.join(basepath, f))]
106         #i = 0
107         
108         while True:
109             t = rp.fetchone()
110             if not t:
111                 break
112             name, path = t
113             
114             #id, path = t[0], os.path.join(basepath, files[i])
115             #i+=1
116             #if i == len(files):
117             #    break
118             
119             print id, path
120             hashlist = self.blocker.block_stor_file(open(path))[1]
121             self.mapper.map_stor(id, hashlist)
122         rp.close()
123
124 class ObjectMigration(DataMigration):
125     def __init__(self, db, path, block_size, hash_algorithm):
126         DataMigration.__init__(self, db, path, block_size, hash_algorithm)
127         options = getattr(settings, 'BACKEND', None)[1]
128         self.backend = ModularBackend(*options)
129     
130     def create_default_containers(self):
131         users = PithosUser.objects.all()
132         for u in users:
133             try:
134                 self.backend.put_container(u.uniq, u.uniq, 'pithos', {})
135                 self.backend.put_container(u.uniq, u.uniq, 'trash', {})
136             except NameError, e:
137                 pass
138     
139     def create_directory_markers(self, parent_id=None, path=None):
140         folderTable = Table('folder', self.metadata, autoload=True)
141         userTable = Table('gss_user', self.metadata, autoload=True)
142         s = select([folderTable.c.id, folderTable.c.name, userTable.c.username])
143         s = s.where(folderTable.c.parent_id == parent_id)
144         s = s.where(folderTable.c.owner_id == userTable.c.id)
145         rp = self.conn.execute(s)
146         while True:
147             t = rp.fetchone()
148             if not t:
149                 path = None
150                 break
151             id, name, uuniq = t[0], t[1], t[2]
152             #print id, name, uuniq
153             if parent_id:
154                 obj = '%s/%s' %(path, name) if path else name
155                 print '#', obj
156                 self.backend.update_object_hashmap(uuniq, uuniq, 'pithos', obj, 0, [])
157             else:
158                 obj = ''
159             self.create_directory_markers(id, path=obj)
160         rp.close()
161         path = None
162     
163     def execute(self):
164         filebody = Table('filebody', self.metadata, autoload=True)
165         s = select([filebody.c.id])
166         rp = self.conn.execute(s)
167         while True:
168             id = rp.fetchone()
169             if not id:
170                 break
171             meta = {}
172             hashlist = self.mapper.map_retr(id)
173             #hashmap = d['hashes']
174             #size = int(d['bytes'])
175             #meta.update({'hash': hashmap_hash(request, hashmap)})
176             #version_id = backend.update_object_hashmap(request.user, v_account,
177             #                                           v_container, v_object,
178             #                                           size, hashmap)
179         rp.close()
180     
181 if __name__ == "__main__":
182     db = ''
183     #db = 'postgresql://gss:archipelagos@62.217.112.56/pithos'
184     t = UserMigration(db)
185     t.execute()
186     
187     basepath = options = getattr(settings, 'PROJECT_PATH', None)
188     params = {'db':db,
189               'path':os.path.join(basepath, 'data/pithos/'),
190               'block_size':(4 * 1024 * 1024),
191               'hash_algorithm':'sha256'}
192     dt = DataMigration(**params)
193     dt.execute()
194     
195     ot = ObjectMigration(**params)
196     ot.create_default_containers()
197     ot.create_directory_markers()
198