Revision 2db16f05

b/pithos/backends/modular.py
86 86
        
87 87
        __import__(mod)
88 88
        self.mod = sys.modules[mod]
89
        self.db = db
89 90
        self.wrapper = self.mod.dbwrapper.DBWrapper(db)
90 91
        
91 92
        params = {'blocksize': self.block_size,
b/tools/lib/hashmap.py
70 70
            h = [self._hash_raw(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
71 71
        return h[0]
72 72
    
73
    def load(self, f):
74
        with open(f) as fp:
75
            for block in file_read_iterator(fp, self.blocksize):
76
                self.append(self._hash_block(block))
73
    def load(self, fp):
74
        self.size = 0
75
        for block in file_read_iterator(fp, self.blocksize):
76
            self.append(self._hash_block(block))
77
            self.size += len(block)
b/tools/lib/migrate.py
36 36
from sqlalchemy import create_engine
37 37
from sqlalchemy import Table, MetaData
38 38

  
39
from django.conf import settings
40

  
39 41
from pithos.backends.modular import ModularBackend
40 42

  
41 43
class Migration(object):
......
44 46
        self.metadata = MetaData(self.engine)
45 47
        #self.engine.echo = True
46 48
        self.conn = self.engine.connect()
49
        
50
        options = getattr(settings, 'BACKEND', None)[1]
51
        self.backend = ModularBackend(*options)
47 52
    
48 53
    def execute(self):
49 54
        pass
b/tools/lib/transfer.py
40 40
from client import Fault
41 41

  
42 42

  
43
def upload(client, file, container, prefix, name=None):
43
def upload(client, file, container, prefix, name=None, mimetype=None):
44 44
    
45 45
    meta = client.retrieve_container_metadata(container)
46 46
    blocksize = int(meta['x-container-block-size'])
......
48 48
    
49 49
    size = os.path.getsize(file)
50 50
    hashes = HashMap(blocksize, blockhash)
51
    hashes.load(file)
51
    hashes.load(open(file))
52 52
    map = {'bytes': size, 'hashes': [hexlify(x) for x in hashes]}
53 53
    
54 54
    objectname = name if name else os.path.split(file)[-1]
55 55
    object = prefix + objectname
56
    kwargs = {'mimetype':mimetype} if mimetype else {}
57
    v = None
56 58
    try:
57
        client.create_object_by_hashmap(container, object, map)
59
        v = client.create_object_by_hashmap(container, object, map, **kwargs)
58 60
    except Fault, fault:
59 61
        if fault.status != 409:
60 62
            raise
61 63
    else:
62
        return
64
        return v
63 65
    
64 66
    if type(fault.data) == types.StringType:
65 67
        missing = fault.data.split('\n')
......
76 78
            block = fp.read(blocksize)
77 79
            client.update_container_data(container, StringIO(block))
78 80
    
79
    client.create_object_by_hashmap(container, object, map)
81
    return client.create_object_by_hashmap(container, object, map, **kwargs)
80 82

  
81 83
def download(client, container, object, file):
82 84
    
b/tools/migrate-data
46 46
from lib.migrate import Migration
47 47

  
48 48
class DataMigration(Migration):
49
    def __init__(self, db):
50
        Migration.__init__(self, db)
49
    def __init__(self, pithosdb, db):
50
        Migration.__init__(self,  pithosdb)
51 51
        # XXX Need more columns for primary key - last modified timestamp...
52
        engine = create_engine(db)
53
        metadata = MetaData(engine)
54
        
52 55
        columns=[]
53 56
        columns.append(Column('path', String(2048), primary_key=True))
54 57
        columns.append(Column('hash', String(255)))
55
        self.files = Table('files', self.metadata, *columns)
56
        self.metadata.create_all(self.engine)
58
        self.files = Table('files', metadata, *columns)
59
        metadata.create_all(engine)
57 60
    
58 61
    def cache_put(self, path, hash):
59 62
        # Insert or replace.
......
108 111
            print status
109 112

  
110 113
if __name__ == "__main__":
114
    pithosdb = ''
111 115
    db = 'sqlite:///migrate.db'
112 116
    
113
    dt = DataMigration(db)
117
    dt = DataMigration(pithosdb, db)
114 118
    dt.execute()
b/tools/migrate_db
39 39
from binascii import hexlify
40 40

  
41 41
from pithos.backends.lib.hashfiler import Blocker
42
from pithos.backends.lib.sqlalchemy import Node
42 43
from pithos.aai.models import PithosUser
43 44

  
44 45
from django.conf import settings
45 46

  
46
from pithos.backends.modular import ModularBackend
47
from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED
48
from pithos.backends.lib.sqlalchemy.node import Node
49
from pithos.backends.lib.sqlalchemy.dbwrapper import DBWrapper
47 50

  
48 51
from lib.transfer import upload
49
from lib.hashmap import HashMap
52
from lib.hashmap import HashMap, file_read_iterator
50 53
from lib.client import Fault
51 54
from lib.migrate import Migration
52 55

  
......
54 57
import os
55 58
import sys
56 59
import hashlib
60
import mimetypes
57 61

  
58
class ObjectMigration(DataMigration):
59
    def __init__(self, db, path, block_size, hash_algorithm):
60
        DataMigration.__init__(self, db, path, block_size, hash_algorithm)
61
        self.wrapper = ClientWrapper()
62
    
62
class ObjectMigration(Migration):
63
    def __init__(self, old_db):
64
        Migration.__init__(self, old_db)
65
        self.wrapper = ClientWrapper(self.backend)
66
        params = {'wrapper': DBWrapper(self.backend.db)}
67
        self.node = Node(**params)
68
        
63 69
    def create_default_containers(self):
64 70
        users = PithosUser.objects.all()
65 71
        for u in users:
......
81 87
        else:
82 88
            return '%s/%s' %(self.get_path(parent_id), foldername)
83 89
    
90
    def create_object(self, username, container, object, filepath, mimetype):
91
        obj = ''
92
        path = '/'.join(object.split('/')[:-1])
93
        name =  object.split('/')[-1]
94
        #create directory markers
95
        for f in path.split('/'):
96
            obj = '%s/%s' %(obj, f) if obj else f
97
            try:
98
                self.wrapper.create_directory_marker('pithos', obj, username)
99
            except NameError, e:
100
                pass
101
        self.wrapper.set_account(username)
102
                
103
        prefix = '%s/' %path if path else ''
104
        print '#', filepath, container, prefix, name, mimetype
105
        return upload(self.wrapper, filepath, container, prefix, name, mimetype)
106
    
107
    def create_history(self, user, header_id, node_id, deleted=False):
108
        filebody = Table('filebody', self.metadata, autoload=True)
109
        gss_user = Table('gss_user', self.metadata, autoload=True)
110
        j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id)
111
        s = select([filebody.c.filesize, gss_user.c.username], from_obj=j)
112
        s = s.where(filebody.c.header_id == header_id)
113
        s = s.order_by(filebody.c.version)
114
        rp = self.conn.execute(s)
115
        versions = rp.fetchall()
116
        print '#', len(versions)
117
        rp.close()
118
        i = 0
119
        for size, modyfied_by  in versions:
120
            cluster = CLUSTER_HISTORY if i < len(versions) - 1 else CLUSTER_NORMAL
121
            cluster = cluster if not deleted else CLUSTER_DELETED
122
            args = (node_id, size, None, modyfied_by, cluster)
123
            self.node.version_create(*args)
124
            i += 1
125
    
84 126
    def create_objects(self):
85 127
        fileheader = Table('fileheader', self.metadata, autoload=True)
86 128
        filebody = Table('filebody', self.metadata, autoload=True)
87 129
        folder = Table('folder', self.metadata, autoload=True)
88 130
        gss_user = Table('gss_user', self.metadata, autoload=True)
89
        j = filebody.join(fileheader, filebody.c.header_id == fileheader.c.id)
131
        j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id)
90 132
        j = j.join(folder, fileheader.c.folder_id == folder.c.id)
91 133
        j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id)
92
        s = select([gss_user.c.username,  fileheader.c.folder_id, fileheader.c.name,
93
                    filebody.c.storedfilepath], from_obj=j)
134
        s = select([gss_user.c.username,  fileheader.c.id, fileheader.c.folder_id,
135
                    fileheader.c.name,  fileheader.c.deleted, filebody.c.storedfilepath,
136
                    filebody.c.mimetype], from_obj=j)
94 137
        rp = self.conn.execute(s)
95 138
        objects = rp.fetchall()
96
        for username, folderid, filename, filepath in objects:
139
        for username, headerid, folderid, filename, deleted, filepath, mimetype in objects:
97 140
            path = self.get_path(folderid)[1:]
98
            obj = ''
99
            #create directory markers
100
            for f in path.split('/'):
101
                obj = '%s/%s' %(obj, f) if obj else f
102
                try:
103
                    self.wrapper.create_directory_marker('pithos', obj, username)
104
                except NameError, e:
105
                    pass
106
            self.wrapper.set_account(username)
107
            
108
            print '#', username, path, filename
109
            prefix = '%s/' %path if path else ''
110
            upload(self.wrapper, filepath, 'pithos', prefix, filename)
141
            container = 'pithos' if not deleted else 'trash'
142
            object = '%s/%s' %(path, filename)
143
            #filepath = '/Users/butters/Downloads/torvalds-linux-0f86267'
144
            vserial = self.create_object(username, container, object, filepath, mimetype)
145
            nodeid = self.node.version_get_properties(vserial, keys=('node',))[0]
146
            self.create_history(username, headerid, nodeid, deleted)
147
            self.node.version_remove(vserial)
148
            #self.set_metadata()
149
            #self.set_public()
150
            #self.statistics()
151
            #self.set_permissions()
152
    
153
    def handle_deleted(self):
154
        pass
155
    
156
    def upload_dir(self, dir, prefix, user, container):
157
        for f in os.listdir(dir):
158
            fullpath = '%s/%s' %(dir, f)
159
            if os.path.isfile(fullpath):
160
                type = mimetypes.guess_type(fullpath)[0]
161
                name = '/'.join(fullpath.split(prefix)[1:])
162
                print '@', user, container, name, fullpath, type
163
                self.create_object(user, container, name, fullpath, type)
164
            else: self.upload_dir(fullpath, prefix, user, container)
111 165

  
112 166
class ClientWrapper(object):
113 167
    """Wraps client methods used by transfer.upload()
114 168
    to ModularBackend methods"""
115 169
    
116
    def __init__(self):
117
        options = getattr(settings, 'BACKEND', None)[1]
118
        self.backend = ModularBackend(*options)
170
    def __init__(self, backend):
171
        self.backend = backend
119 172
        self.block_size = self.backend.block_size
120 173
        self.block_hash = self.backend.hash_algorithm
121 174
    
......
131 184
                'hash':  md5.hexdigest().lower()}
132 185
        self.backend.update_object_hashmap(account, account, container, object, 0, [], meta)   
133 186
    
134
    def create_object_by_hashmap(self, container, object, map):
187
    def create_object_by_hashmap(self, container, object, map, mimetype=None):
135 188
        hashmap = HashMap(self.block_size, self.block_hash)
136
        for hash in map['hashes']:
137
            hashmap.append(hash)
189
        for h in map['hashes']:
190
            hashmap.append(h)
138 191
        meta = {'hash':hexlify(hashmap.hash())}
192
        if mimetype:
193
            meta['content-type'] = mimetype
139 194
        size = map['bytes']
140 195
        try:
141 196
            args = [self.account, self.account, container, object, size,  map['hashes'], meta]
142
            self.backend.update_object_hashmap(*args)
197
            return self.backend.update_object_hashmap(*args)
143 198
        except IndexError, ie:
144 199
            fault = Fault(ie.data, 409)
145 200
            raise fault
146 201
    
147
    def create_object(self, container, object, f):
148
        hashmap = HashMap(self.block_size, self.block_hash)
149
        hashmap.load(f)
150
        map =  [hexlify(x) for x in hashmap]
151
        meta = {'hash':hashmap.hash()}
152
        size = hashmap.size
153
        self.backend.update_object_hashmap(self.account, self.account, container, object, size,  hashmap, meta)
202
    def update_container_data(self, container, f):
203
        #just put the blocks
204
        for block in file_read_iterator(f, self.block_size):
205
            self.backend.put_block(block)
154 206
    
155 207
    def retrieve_container_metadata(self, container):
156 208
        return {'x-container-block-size':self.block_size,
157 209
                'x-container-block-hash':self.block_hash}
158
    
210

  
159 211
if __name__ == "__main__":
160
    db = ''
212
    old_db = ''
161 213
    
162
    basepath = options = getattr(settings, 'PROJECT_PATH', None)
163
    params = {'db':db,
164
              'path':os.path.join(basepath, 'data/pithos/'),
165
              'block_size':(4 * 1024 * 1024),
166
              'hash_algorithm':'sha256'}
214
    ot = ObjectMigration(old_db)
215
    #ot.create_default_containers()
216
    #ot.create_objects()
167 217
    
168
    ot = ObjectMigration(**params)
169
    ot.create_default_containers()
170
    ot.create_objects()
218
    p = ''
219
    ot.upload_dir(p, p, 'chstath', 'linux')
171 220
    
172 221
    

Also available in: Unified diff