Statistics
| Branch: | Tag: | Revision:

root / tools / migrate-db @ f6c0005f

History | View | Annotate | Download (8 kB)

1
#!/usr/bin/env python
2

    
3
# Copyright 2011 GRNET S.A. All rights reserved.
4
# 
5
# Redistribution and use in source and binary forms, with or
6
# without modification, are permitted provided that the following
7
# conditions are met:
8
# 
9
#   1. Redistributions of source code must retain the above
10
#      copyright notice, this list of conditions and the following
11
#      disclaimer.
12
# 
13
#   2. Redistributions in binary form must reproduce the above
14
#      copyright notice, this list of conditions and the following
15
#      disclaimer in the documentation and/or other materials
16
#      provided with the distribution.
17
# 
18
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
19
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
22
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
25
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
# POSSIBILITY OF SUCH DAMAGE.
30
# 
31
# The views and conclusions contained in the software and
32
# documentation are those of the authors and should not be
33
# interpreted as representing official policies, either expressed
34
# or implied, of GRNET S.A.
35

    
36
from sqlalchemy import Table
37
from sqlalchemy.sql import select
38

    
39
from binascii import hexlify
40

    
41
from pithos.backends.lib.hashfiler import Blocker
42
from pithos.backends.lib.sqlalchemy import Node
43
from pithos.aai.models import PithosUser
44

    
45
from django.conf import settings
46

    
47
from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED
48
from pithos.backends.lib.sqlalchemy.node import Node
49

    
50
from lib.transfer import upload
51
from lib.hashmap import HashMap, file_read_iterator
52
from lib.client import Fault
53
from lib.migrate import Migration, Cache
54
from calendar import timegm
55

    
56
import json
57
import os
58
import sys
59
import hashlib
60
import mimetypes
61

    
62
class ObjectMigration(Migration):
63
    def __init__(self, old_db, db, f):
64
        Migration.__init__(self, old_db)
65
        self.cache = Cache(db)
66
    
67
    def create_node(self, username, container, object, filepath, mimetype):
68
        obj = ''
69
        path = '/'.join(object.split('/')[:-1])
70
        name =  object.split('/')[-1]
71
        #create directory markers
72
        for f in path.split('/'):
73
            obj = '%s/%s' %(obj, f) if obj else f
74
            try:
75
                md5 = hashlib.md5()
76
                meta = {'Content-Type':'application/directory',
77
                        'hash':  md5.hexdigest().lower()}
78
                self.backend.update_object_hashmap(username, username, container, obj, 0, [], meta) 
79
            except NameError, e:
80
                pass
81
        
82
        parent_path = '%s/%s' %(username, container)
83
        parent_node = self.backend.node.node_lookup(parent_path)
84
        path = '%s/%s' %(parent_path, object)
85
        nodeid = self.backend.node.node_create(parent_node, path)
86
        return nodeid
87
    
88
    def create_history(self, header_id, node_id, deleted=False):
89
        i = 0
90
        map = HashMap(self.backend.block_size, self.backend.hash_algorithm)
91
        for t, rowcount  in self.retrieve_node_versions(header_id):
92
            size, modyfied_by, filepath, mimetype, modificationdate = t
93
            cluster = CLUSTER_HISTORY if i < rowcount - 1 else CLUSTER_NORMAL
94
            cluster = cluster if not deleted else CLUSTER_DELETED
95
            hash = self.cache.get(filepath)
96
            if hash == None:
97
                raise Exception("Missing hash") 
98
            args = (node_id, hash, size, None, modyfied_by, cluster)
99
            serial = self.backend.node.version_create(*args)[0]
100
            meta = {'hash':hash,
101
                    'content-type':mimetype}
102
            self.backend.node.attribute_set(serial, ((k, v) for k, v in meta.iteritems()))
103
            timestamp = timegm(modificationdate.timetuple())
104
            microseconds = modificationdate.time().microsecond
105
            f.write('update versions set mtime=\'%10d.%6d\' where serial=%s;' %(timestamp, microseconds, serial))
106
            i += 1
107
    
108
    def create_metadata(self, header_id, node_id):
109
        for t in self.retrieve_metadata(header_id):
110
            pass
111
    
112
    def create_objects(self):
113
        for username, headerid, folderid, filename, deleted, filepath, mimetype in self.retrieve_current_nodes():
114
            path = self.retrieve_path(folderid)[1:]
115
            container = 'pithos' if not deleted else 'trash'
116
            
117
            #create container if it does not exist
118
            try:
119
                self.backend._lookup_container(username, container)
120
            except NameError:
121
                self.backend.put_container(username, username, container) 
122
            
123
            #create node
124
            object = '%s/%s' %(path, filename)
125
            nodeid = self.create_node(username, container, object, filepath, mimetype)
126
            
127
            #create node history
128
            self.create_history(headerid, nodeid, deleted)
129
            
130
            self.create_metadata(headerid, nodeid)
131
            #self.set_public()
132
            #self.statistics()
133
            #self.set_permissions()
134
    
135
    def retrieve_path(self, child_id):
136
        folderTable = Table('folder', self.metadata, autoload=True)
137
        s = select([folderTable.c.parent_id, folderTable.c.name])
138
        s = s.where(folderTable.c.id == child_id)
139
        rp = self.conn.execute(s)
140
        parent_id, foldername = rp.fetchone()
141
        if not parent_id:
142
            return ''
143
        else:
144
            return '%s/%s' %(self.retrieve_path(parent_id), foldername)
145
    
146
    def retrieve_current_nodes(self):
147
        fileheader = Table('fileheader', self.metadata, autoload=True)
148
        filebody = Table('filebody', self.metadata, autoload=True)
149
        folder = Table('folder', self.metadata, autoload=True)
150
        gss_user = Table('gss_user', self.metadata, autoload=True)
151
        j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id)
152
        j = j.join(folder, fileheader.c.folder_id == folder.c.id)
153
        j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id)
154
        s = select([gss_user.c.username,  fileheader.c.id, fileheader.c.folder_id,
155
                    fileheader.c.name,  fileheader.c.deleted, filebody.c.storedfilepath,
156
                    filebody.c.mimetype], from_obj=j)
157
        s = s.limit(1)
158
        rp = self.conn.execute(s)
159
        object = rp.fetchone()
160
        while object:
161
            yield object
162
            object = rp.fetchone()
163
        rp.close()
164
    
165
    def retrieve_node_versions(self, header_id):
166
        filebody = Table('filebody', self.metadata, autoload=True)
167
        gss_user = Table('gss_user', self.metadata, autoload=True)
168
        j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id)
169
        s = select([filebody.c.filesize, gss_user.c.username,
170
                    filebody.c.storedfilepath, filebody.c.mimetype,
171
                    filebody.c.modificationdate], from_obj=j)
172
        s = s.where(filebody.c.header_id == header_id)
173
        s = s.order_by(filebody.c.version)
174
        rp = self.conn.execute(s)
175
        version = rp.fetchone()
176
        while version:
177
            yield version, rp.rowcount
178
            version = rp.fetchone()
179
        rp.close()
180
    
181
    def retrieve_metadata(self, header_id):
182
        filetag = Table('filetag', self.metadata, autoload=True)
183
        s = filetag.select(filetag.c.fileid == header_id)
184
        rp = self.conn.execute(s)
185
        tag = rp.fetchone()
186
        while tag:
187
            yield tag
188
            tag = tp.fetchone()
189
        rp.close()
190
    
191
    def handle_deleted(self):
192
        pass
193

    
194
if __name__ == "__main__":
195
    old_db = ''
196
    db = ''
197
    
198
    f = open('fixdates.sql', 'w')
199
    ot = ObjectMigration(old_db, db, f)
200
    ot.create_objects()
201
    f.close()
202
    
203