Statistics
| Branch: | Tag: | Revision:

root / other / migrate-db @ 2e662088

History | View | Annotate | Download (13.2 kB)

1
#!/usr/bin/env python
2

    
3
# Copyright 2011-2012 GRNET S.A. All rights reserved.
4
# 
5
# Redistribution and use in source and binary forms, with or
6
# without modification, are permitted provided that the following
7
# conditions are met:
8
# 
9
#   1. Redistributions of source code must retain the above
10
#      copyright notice, this list of conditions and the following
11
#      disclaimer.
12
# 
13
#   2. Redistributions in binary form must reproduce the above
14
#      copyright notice, this list of conditions and the following
15
#      disclaimer in the documentation and/or other materials
16
#      provided with the distribution.
17
# 
18
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
19
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
22
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
25
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
# POSSIBILITY OF SUCH DAMAGE.
30
# 
31
# The views and conclusions contained in the software and
32
# documentation are those of the authors and should not be
33
# interpreted as representing official policies, either expressed
34
# or implied, of GRNET S.A.
35

    
36
from sqlalchemy import Table
37
from sqlalchemy.sql import select, and_
38

    
39
from binascii import hexlify
40

    
41
from pithos.backends.lib.hashfiler import Blocker
42
from pithos.backends.lib.sqlalchemy import Node
43

    
44
from django.conf import settings
45

    
46
from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED
47
from pithos.backends.lib.sqlalchemy.node import Node, ROOTNODE
48

    
49
from pithos.lib.transfer import upload
50
from pithos.lib.hashmap import HashMap
51
from pithos.lib.client import Fault
52

    
53
from migrate import Migration, Cache
54

    
55
from calendar import timegm
56
from decimal import Decimal
57
from collections import defaultdict
58

    
59
import json
60
import os
61
import sys
62
import hashlib
63
import mimetypes
64
import time
65
import datetime
66

    
67
(ID, CREATIONDATE, MODIFICATIONDATE, DELETED, ICON, NAME, VERSION, CREATEDBY_ID, MODIFIEDBY_ID, OWNER_ID, PARENT_ID, READFORALL, SHARED, USER) = range(14)
68

    
69
class ObjectMigration(Migration):
70
    def __init__(self, old_db, db, f):
71
        Migration.__init__(self, old_db)
72
        self.cache = Cache(db)
73
    
74
    def create_node(self, username, container, object):
75
        node = self.backend.node.node_lookup(object)
76
        if not node:
77
            parent_path = '%s/%s' %(username, container)
78
            parent_node = self.backend.node.node_lookup(parent_path)
79
            if not parent_node:
80
                raise Exception('Missing node')
81
            node = self.backend.node.node_create(parent_node, object)
82
        return node
83
    
84
    def create_history(self, header_id, node_id, deleted=False):
85
        i = 0
86
        map = HashMap(self.backend.block_size, self.backend.hash_algorithm)
87
        v = []
88
        stored_versions = self.backend.node.node_get_versions(node_id, ['mtime'])
89
        stored_versions_mtime = [datetime.datetime.utcfromtimestamp(elem[0]) for elem in stored_versions]
90
        for t, rowcount  in self.retrieve_node_versions(header_id):
91
            size, modyfied_by, filepath, mimetype, mdate = t
92
            if mdate in stored_versions_mtime:
93
                continue
94
            cluster = CLUSTER_HISTORY if i < rowcount - 1 else CLUSTER_NORMAL
95
            cluster = cluster if not deleted else CLUSTER_DELETED
96
            hash = self.cache.get(filepath)
97
            if hash == None:
98
                raise Exception("Missing hash")
99
            args = node_id, hash, size, modyfied_by, cluster, mimetype, mdate
100
            v.append(self.create_version(*args))
101
            i += 1
102
        return v
103
    
104
    def create_version(self, node_id, hash, size, modyfied_by, cluster, mimetype, mdate):
105
        args = (node_id, hash, size, None, modyfied_by, cluster)
106
        serial = self.backend.node.version_create(*args)[0]
107
        meta = {'hash':hash,
108
                'content-type':mimetype}
109
        self.backend.node.attribute_set(serial, ((k, v) for k, v in meta.iteritems()))
110
        timestamp = timegm(mdate.timetuple())
111
        microseconds = mdate.time().microsecond
112
        values = timestamp, microseconds, serial
113
        f.write('update versions set mtime=\'%10d.%6d\' where serial=%s;' %values)
114
        return serial
115
    
116
    def create_tags(self, header_id, node_id, vserials):
117
        tags = self.retrieve_tags(header_id)
118
        if not tags:
119
            return
120
        for v in vserials:
121
            self.backend.node.attribute_set(v, (('X-Object-Meta-Tag', tags),))
122
    
123
    def create_permissions(self, fid, path, owner, is_folder=True):
124
        fpath, fpermissions = self.backend.permissions.access_inherit(path)
125
        permissions = self.retrieve_permissions(fid, is_folder)
126
        if not fpermissions:
127
            keys = ('read', 'write')
128
            for k in keys:
129
                if owner in permissions[k]:
130
                    permissions[k].remove(owner)
131
            self.backend.permissions.access_set(path, permissions)
132
        else:
133
            keys = ('read', 'write')
134
            common_p = {}
135
            for k in keys:
136
                if owner in permissions[k]:
137
                    permissions[k].remove(owner)
138
                common = set(fpermissions[k]).intersection(set(permissions[k]))
139
                common_p[k] = list(common)
140
            #keep only the common permissions
141
            #trade off for securing access only to explicitly authorized users
142
            self.backend.permissions.access_set(fpath, common_p)
143
    
144
    def create_objects(self):
145
        for t in self.retrieve_current_nodes():
146
            username, headerid, folderid, filename, deleted, filepath, mimetype, public, owner_id = t
147
            containers = ['pithos', 'trash']
148
            
149
            for c in containers:
150
                #create container if it does not exist
151
                try:
152
                    self.backend._lookup_container(username, c)
153
                except NameError, e:
154
                    self.backend.put_container(username, username, c) 
155
            
156
            container = 'pithos' if not deleted else 'trash'
157
            path = self.build_path(folderid)
158
            #create node
159
            object = '%s/%s' %(username, container)
160
            object = '%s/%s/%s' %(object, path, filename) if path else '%s/%s' %(object, filename)
161
            args = username, container, object
162
            nodeid = self.create_node(*args)
163
            #create node history 
164
            vserials = self.create_history(headerid, nodeid, deleted)
165
            #set object tags
166
            self.create_tags(headerid, nodeid, vserials)
167
            #set object's publicity
168
            if public:
169
                self.backend.permissions.public_set(object)
170
            #set object's permissions
171
            self.create_permissions(headerid, object, username, is_folder=False)
172
    
173
    def build_path(self, child_id):
174
        folder = Table('folder', self.metadata, autoload=True)
175
        user = Table('gss_user', self.metadata, autoload=True)
176
        j = folder.join(user, folder.c.owner_id == user.c.id)
177
        s = select([folder, user.c.username], from_obj=j)
178
        s = s.where(folder.c.id == child_id)
179
        s.order_by(folder.c.modificationdate)
180
        rp = self.conn.execute(s)
181
        t = rp.fetchone()
182
        md5 = hashlib.md5()
183
        hash = md5.hexdigest().lower()
184
        size = 0
185
        if not t[PARENT_ID]:
186
            return ''
187
        else:
188
            container_path = t[USER]
189
            container_path += '/trash' if t[DELETED] else '/pithos'
190
            parent_node = self.backend.node.node_lookup(container_path)
191
            if not parent_node:
192
                raise Exception('Missing node:', container_path)
193
            parent_path = self.build_path(t[PARENT_ID])
194
            path = '%s/%s/%s' %(container_path, parent_path, t[NAME]) if parent_path else '%s/%s' %(container_path, t[NAME])
195
            node = self.backend.node.node_lookup(path)
196
            if not node:
197
                node = self.backend.node.node_create(parent_node, path)
198
                if not node:
199
                    raise Exception('Unable to create node:', path)
200
                
201
                #create versions
202
                v = self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[CREATIONDATE])
203
                if t[CREATIONDATE] != t[MODIFICATIONDATE]:
204
                    self.backend.node.version_recluster(v, CLUSTER_HISTORY)
205
                    self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[MODIFICATIONDATE])
206
                
207
                #set permissions
208
                self.create_permissions(t[ID], path, t[USER], is_folder=True)
209
            return '%s/%s' %(parent_path, t[NAME]) if parent_path else t[NAME]
210
    
211
    def retrieve_current_nodes(self):
212
        fileheader = Table('fileheader', self.metadata, autoload=True)
213
        filebody = Table('filebody', self.metadata, autoload=True)
214
        folder = Table('folder', self.metadata, autoload=True)
215
        gss_user = Table('gss_user', self.metadata, autoload=True)
216
        j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id)
217
        j = j.join(folder, fileheader.c.folder_id == folder.c.id)
218
        j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id)
219
        s = select([gss_user.c.username,  fileheader.c.id, fileheader.c.folder_id,
220
                    fileheader.c.name,  fileheader.c.deleted,
221
                    filebody.c.storedfilepath, filebody.c.mimetype,
222
                    fileheader.c.readforall, fileheader.c.owner_id], from_obj=j)
223
        rp = self.conn.execute(s)
224
        object = rp.fetchone()
225
        while object:
226
            yield object
227
            object = rp.fetchone()
228
        rp.close()
229
    
230
    def retrieve_node_versions(self, header_id):
231
        filebody = Table('filebody', self.metadata, autoload=True)
232
        gss_user = Table('gss_user', self.metadata, autoload=True)
233
        j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id)
234
        s = select([filebody.c.filesize, gss_user.c.username,
235
                    filebody.c.storedfilepath, filebody.c.mimetype,
236
                    filebody.c.modificationdate], from_obj=j)
237
        s = s.where(filebody.c.header_id == header_id)
238
        s = s.order_by(filebody.c.version)
239
        rp = self.conn.execute(s)
240
        version = rp.fetchone()
241
        while version:
242
            yield version, rp.rowcount
243
            version = rp.fetchone()
244
        rp.close()
245
    
246
    def retrieve_tags(self, header_id):
247
        filetag = Table('filetag', self.metadata, autoload=True)
248
        s = select([filetag.c.tag], filetag.c.fileid == header_id)
249
        rp = self.conn.execute(s)
250
        tags = rp.fetchall() if rp.returns_rows else []
251
        tags = [elem[0] for elem in tags]
252
        rp.close()
253
        return ','.join(tags) if tags else ''
254
    
255
    def retrieve_permissions(self, id, is_folder=True):
256
        permissions = {}
257
        if is_folder:
258
            ftable = Table('folder_permission', self.metadata, autoload=True)
259
        else:
260
            ftable = Table('fileheader_permission', self.metadata, autoload=True)
261
        permission = Table('permission', self.metadata, autoload=True)
262
        group = Table('gss_group', self.metadata, autoload=True)
263
        user = Table('gss_user', self.metadata, autoload=True)
264
        j = ftable.join(permission, ftable.c.permissions_id == permission.c.id)
265
        j1 = j.join(group, group.c.id == permission.c.group_id)
266
        j2 = j.join(user, user.c.id == permission.c.user_id)
267
        
268
        permissions = defaultdict(list)
269
        
270
        def _get_permissions(self, action='read', get_groups=True):
271
            if get_groups:
272
                col, j = group.c.name, j1
273
                cond2 = permission.c.group_id != None
274
            else:
275
                col, j = user.c.username, j2
276
                cond2 = permission.c.user_id != None
277
            s = select([col], from_obj=j)
278
            if is_folder:
279
                s = s.where(ftable.c.folder_id == id)
280
            else:
281
                s = s.where(ftable.c.fileheader_id == id)
282
            if action == 'read':
283
                cond1 = permission.c.read == True
284
            else:
285
                cond1 = permission.c.write == True
286
            s = s.where(and_(cond1, cond2))
287
            print '>', s, s.compile().params
288
            rp = self.conn.execute(s)
289
            p = permissions[action].extend([e[0] for e in rp.fetchall()])
290
            rp.close()
291
            return p
292
        
293
        #get object read groups
294
        _get_permissions(self, action='read', get_groups=True)
295
        
296
        #get object read users
297
        _get_permissions(self, action='read', get_groups=False)
298
        
299
        #get object write groups
300
        _get_permissions(self, action='write', get_groups=True)
301
        
302
        #get object write groups
303
        _get_permissions(self, action='write', get_groups=False)
304
        
305
        return permissions
306
    
307
if __name__ == "__main__":
308
    old_db = ''
309
    db = ''
310
    
311
    f = open('fixdates.sql', 'w')
312
    ot = ObjectMigration(old_db, db, f)
313
    ot.create_objects()
314
    f.close()
315
    
316