Revision 2db16f05 tools/migrate_db

b/tools/migrate_db
39 39
from binascii import hexlify
40 40

  
41 41
from pithos.backends.lib.hashfiler import Blocker
42
from pithos.backends.lib.sqlalchemy import Node
42 43
from pithos.aai.models import PithosUser
43 44

  
44 45
from django.conf import settings
45 46

  
46
from pithos.backends.modular import ModularBackend
47
from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED
48
from pithos.backends.lib.sqlalchemy.node import Node
49
from pithos.backends.lib.sqlalchemy.dbwrapper import DBWrapper
47 50

  
48 51
from lib.transfer import upload
49
from lib.hashmap import HashMap
52
from lib.hashmap import HashMap, file_read_iterator
50 53
from lib.client import Fault
51 54
from lib.migrate import Migration
52 55

  
......
54 57
import os
55 58
import sys
56 59
import hashlib
60
import mimetypes
57 61

  
58
class ObjectMigration(DataMigration):
59
    def __init__(self, db, path, block_size, hash_algorithm):
60
        DataMigration.__init__(self, db, path, block_size, hash_algorithm)
61
        self.wrapper = ClientWrapper()
62
    
62
class ObjectMigration(Migration):
63
    def __init__(self, old_db):
64
        Migration.__init__(self, old_db)
65
        self.wrapper = ClientWrapper(self.backend)
66
        params = {'wrapper': DBWrapper(self.backend.db)}
67
        self.node = Node(**params)
68
        
63 69
    def create_default_containers(self):
64 70
        users = PithosUser.objects.all()
65 71
        for u in users:
......
81 87
        else:
82 88
            return '%s/%s' %(self.get_path(parent_id), foldername)
83 89
    
90
    def create_object(self, username, container, object, filepath, mimetype):
91
        obj = ''
92
        path = '/'.join(object.split('/')[:-1])
93
        name =  object.split('/')[-1]
94
        #create directory markers
95
        for f in path.split('/'):
96
            obj = '%s/%s' %(obj, f) if obj else f
97
            try:
98
                self.wrapper.create_directory_marker('pithos', obj, username)
99
            except NameError, e:
100
                pass
101
        self.wrapper.set_account(username)
102
                
103
        prefix = '%s/' %path if path else ''
104
        print '#', filepath, container, prefix, name, mimetype
105
        return upload(self.wrapper, filepath, container, prefix, name, mimetype)
106
    
107
    def create_history(self, user, header_id, node_id, deleted=False):
108
        filebody = Table('filebody', self.metadata, autoload=True)
109
        gss_user = Table('gss_user', self.metadata, autoload=True)
110
        j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id)
111
        s = select([filebody.c.filesize, gss_user.c.username], from_obj=j)
112
        s = s.where(filebody.c.header_id == header_id)
113
        s = s.order_by(filebody.c.version)
114
        rp = self.conn.execute(s)
115
        versions = rp.fetchall()
116
        print '#', len(versions)
117
        rp.close()
118
        i = 0
119
        for size, modyfied_by  in versions:
120
            cluster = CLUSTER_HISTORY if i < len(versions) - 1 else CLUSTER_NORMAL
121
            cluster = cluster if not deleted else CLUSTER_DELETED
122
            args = (node_id, size, None, modyfied_by, cluster)
123
            self.node.version_create(*args)
124
            i += 1
125
    
84 126
    def create_objects(self):
85 127
        fileheader = Table('fileheader', self.metadata, autoload=True)
86 128
        filebody = Table('filebody', self.metadata, autoload=True)
87 129
        folder = Table('folder', self.metadata, autoload=True)
88 130
        gss_user = Table('gss_user', self.metadata, autoload=True)
89
        j = filebody.join(fileheader, filebody.c.header_id == fileheader.c.id)
131
        j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id)
90 132
        j = j.join(folder, fileheader.c.folder_id == folder.c.id)
91 133
        j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id)
92
        s = select([gss_user.c.username,  fileheader.c.folder_id, fileheader.c.name,
93
                    filebody.c.storedfilepath], from_obj=j)
134
        s = select([gss_user.c.username,  fileheader.c.id, fileheader.c.folder_id,
135
                    fileheader.c.name,  fileheader.c.deleted, filebody.c.storedfilepath,
136
                    filebody.c.mimetype], from_obj=j)
94 137
        rp = self.conn.execute(s)
95 138
        objects = rp.fetchall()
96
        for username, folderid, filename, filepath in objects:
139
        for username, headerid, folderid, filename, deleted, filepath, mimetype in objects:
97 140
            path = self.get_path(folderid)[1:]
98
            obj = ''
99
            #create directory markers
100
            for f in path.split('/'):
101
                obj = '%s/%s' %(obj, f) if obj else f
102
                try:
103
                    self.wrapper.create_directory_marker('pithos', obj, username)
104
                except NameError, e:
105
                    pass
106
            self.wrapper.set_account(username)
107
            
108
            print '#', username, path, filename
109
            prefix = '%s/' %path if path else ''
110
            upload(self.wrapper, filepath, 'pithos', prefix, filename)
141
            container = 'pithos' if not deleted else 'trash'
142
            object = '%s/%s' %(path, filename)
143
            #filepath = '/Users/butters/Downloads/torvalds-linux-0f86267'
144
            vserial = self.create_object(username, container, object, filepath, mimetype)
145
            nodeid = self.node.version_get_properties(vserial, keys=('node',))[0]
146
            self.create_history(username, headerid, nodeid, deleted)
147
            self.node.version_remove(vserial)
148
            #self.set_metadata()
149
            #self.set_public()
150
            #self.statistics()
151
            #self.set_permissions()
152
    
153
    def handle_deleted(self):
154
        pass
155
    
156
    def upload_dir(self, dir, prefix, user, container):
157
        for f in os.listdir(dir):
158
            fullpath = '%s/%s' %(dir, f)
159
            if os.path.isfile(fullpath):
160
                type = mimetypes.guess_type(fullpath)[0]
161
                name = '/'.join(fullpath.split(prefix)[1:])
162
                print '@', user, container, name, fullpath, type
163
                self.create_object(user, container, name, fullpath, type)
164
            else: self.upload_dir(fullpath, prefix, user, container)
111 165

  
112 166
class ClientWrapper(object):
113 167
    """Wraps client methods used by transfer.upload()
114 168
    to ModularBackend methods"""
115 169
    
116
    def __init__(self):
117
        options = getattr(settings, 'BACKEND', None)[1]
118
        self.backend = ModularBackend(*options)
170
    def __init__(self, backend):
171
        self.backend = backend
119 172
        self.block_size = self.backend.block_size
120 173
        self.block_hash = self.backend.hash_algorithm
121 174
    
......
131 184
                'hash':  md5.hexdigest().lower()}
132 185
        self.backend.update_object_hashmap(account, account, container, object, 0, [], meta)   
133 186
    
134
    def create_object_by_hashmap(self, container, object, map):
187
    def create_object_by_hashmap(self, container, object, map, mimetype=None):
135 188
        hashmap = HashMap(self.block_size, self.block_hash)
136
        for hash in map['hashes']:
137
            hashmap.append(hash)
189
        for h in map['hashes']:
190
            hashmap.append(h)
138 191
        meta = {'hash':hexlify(hashmap.hash())}
192
        if mimetype:
193
            meta['content-type'] = mimetype
139 194
        size = map['bytes']
140 195
        try:
141 196
            args = [self.account, self.account, container, object, size,  map['hashes'], meta]
142
            self.backend.update_object_hashmap(*args)
197
            return self.backend.update_object_hashmap(*args)
143 198
        except IndexError, ie:
144 199
            fault = Fault(ie.data, 409)
145 200
            raise fault
146 201
    
147
    def create_object(self, container, object, f):
148
        hashmap = HashMap(self.block_size, self.block_hash)
149
        hashmap.load(f)
150
        map =  [hexlify(x) for x in hashmap]
151
        meta = {'hash':hashmap.hash()}
152
        size = hashmap.size
153
        self.backend.update_object_hashmap(self.account, self.account, container, object, size,  hashmap, meta)
202
    def update_container_data(self, container, f):
203
        #just put the blocks
204
        for block in file_read_iterator(f, self.block_size):
205
            self.backend.put_block(block)
154 206
    
155 207
    def retrieve_container_metadata(self, container):
156 208
        return {'x-container-block-size':self.block_size,
157 209
                'x-container-block-hash':self.block_hash}
158
    
210

  
159 211
if __name__ == "__main__":
160
    db = ''
212
    old_db = ''
161 213
    
162
    basepath = options = getattr(settings, 'PROJECT_PATH', None)
163
    params = {'db':db,
164
              'path':os.path.join(basepath, 'data/pithos/'),
165
              'block_size':(4 * 1024 * 1024),
166
              'hash_algorithm':'sha256'}
214
    ot = ObjectMigration(old_db)
215
    #ot.create_default_containers()
216
    #ot.create_objects()
167 217
    
168
    ot = ObjectMigration(**params)
169
    ot.create_default_containers()
170
    ot.create_objects()
218
    p = ''
219
    ot.upload_dir(p, p, 'chstath', 'linux')
171 220
    
172 221
    

Also available in: Unified diff