Migration Tools: Progess III
[pithos] / tools / migrate_db
index 1e9a499..2b4a035 100755 (executable)
@@ -39,14 +39,17 @@ from sqlalchemy.sql import select
 from binascii import hexlify
 
 from pithos.backends.lib.hashfiler import Blocker
+from pithos.backends.lib.sqlalchemy import Node
 from pithos.aai.models import PithosUser
 
 from django.conf import settings
 
-from pithos.backends.modular import ModularBackend
+from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED
+from pithos.backends.lib.sqlalchemy.node import Node
+from pithos.backends.lib.sqlalchemy.dbwrapper import DBWrapper
 
 from lib.transfer import upload
-from lib.hashmap import HashMap
+from lib.hashmap import HashMap, file_read_iterator
 from lib.client import Fault
 from lib.migrate import Migration
 
@@ -54,12 +57,15 @@ import json
 import os
 import sys
 import hashlib
+import mimetypes
 
-class ObjectMigration(DataMigration):
-    def __init__(self, db, path, block_size, hash_algorithm):
-        DataMigration.__init__(self, db, path, block_size, hash_algorithm)
-        self.wrapper = ClientWrapper()
-    
+class ObjectMigration(Migration):
+    def __init__(self, old_db):
+        Migration.__init__(self, old_db)
+        self.wrapper = ClientWrapper(self.backend)
+        params = {'wrapper': DBWrapper(self.backend.db)}
+        self.node = Node(**params)
+        
     def create_default_containers(self):
         users = PithosUser.objects.all()
         for u in users:
@@ -81,41 +87,88 @@ class ObjectMigration(DataMigration):
         else:
             return '%s/%s' %(self.get_path(parent_id), foldername)
     
+    def create_object(self, username, container, object, filepath, mimetype):
+        obj = ''
+        path = '/'.join(object.split('/')[:-1])
+        name =  object.split('/')[-1]
+        #create directory markers
+        for f in path.split('/'):
+            obj = '%s/%s' %(obj, f) if obj else f
+            try:
+                self.wrapper.create_directory_marker('pithos', obj, username)
+            except NameError, e:
+                pass
+        self.wrapper.set_account(username)
+                
+        prefix = '%s/' %path if path else ''
+        print '#', filepath, container, prefix, name, mimetype
+        return upload(self.wrapper, filepath, container, prefix, name, mimetype)
+    
+    def create_history(self, user, header_id, node_id, deleted=False):
+        filebody = Table('filebody', self.metadata, autoload=True)
+        gss_user = Table('gss_user', self.metadata, autoload=True)
+        j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id)
+        s = select([filebody.c.filesize, gss_user.c.username], from_obj=j)
+        s = s.where(filebody.c.header_id == header_id)
+        s = s.order_by(filebody.c.version)
+        rp = self.conn.execute(s)
+        versions = rp.fetchall()
+        print '#', len(versions)
+        rp.close()
+        i = 0
+        for size, modyfied_by  in versions:
+            cluster = CLUSTER_HISTORY if i < len(versions) - 1 else CLUSTER_NORMAL
+            cluster = cluster if not deleted else CLUSTER_DELETED
+            args = (node_id, size, None, modyfied_by, cluster)
+            self.node.version_create(*args)
+            i += 1
+    
     def create_objects(self):
         fileheader = Table('fileheader', self.metadata, autoload=True)
         filebody = Table('filebody', self.metadata, autoload=True)
         folder = Table('folder', self.metadata, autoload=True)
         gss_user = Table('gss_user', self.metadata, autoload=True)
-        j = filebody.join(fileheader, filebody.c.header_id == fileheader.c.id)
+        j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id)
         j = j.join(folder, fileheader.c.folder_id == folder.c.id)
         j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id)
-        s = select([gss_user.c.username,  fileheader.c.folder_id, fileheader.c.name,
-                    filebody.c.storedfilepath], from_obj=j)
+        s = select([gss_user.c.username,  fileheader.c.id, fileheader.c.folder_id,
+                    fileheader.c.name,  fileheader.c.deleted, filebody.c.storedfilepath,
+                    filebody.c.mimetype], from_obj=j)
         rp = self.conn.execute(s)
         objects = rp.fetchall()
-        for username, folderid, filename, filepath in objects:
+        for username, headerid, folderid, filename, deleted, filepath, mimetype in objects:
             path = self.get_path(folderid)[1:]
-            obj = ''
-            #create directory markers
-            for f in path.split('/'):
-                obj = '%s/%s' %(obj, f) if obj else f
-                try:
-                    self.wrapper.create_directory_marker('pithos', obj, username)
-                except NameError, e:
-                    pass
-            self.wrapper.set_account(username)
-            
-            print '#', username, path, filename
-            prefix = '%s/' %path if path else ''
-            upload(self.wrapper, filepath, 'pithos', prefix, filename)
+            container = 'pithos' if not deleted else 'trash'
+            object = '%s/%s' %(path, filename)
+            #filepath = '/Users/butters/Downloads/torvalds-linux-0f86267'
+            vserial = self.create_object(username, container, object, filepath, mimetype)
+            nodeid = self.node.version_get_properties(vserial, keys=('node',))[0]
+            self.create_history(username, headerid, nodeid, deleted)
+            self.node.version_remove(vserial)
+            #self.set_metadata()
+            #self.set_public()
+            #self.statistics()
+            #self.set_permissions()
+    
+    def handle_deleted(self):
+        pass
+    
+    def upload_dir(self, dir, prefix, user, container):
+        for f in os.listdir(dir):
+            fullpath = '%s/%s' %(dir, f)
+            if os.path.isfile(fullpath):
+                type = mimetypes.guess_type(fullpath)[0]
+                name = '/'.join(fullpath.split(prefix)[1:])
+                print '@', user, container, name, fullpath, type
+                self.create_object(user, container, name, fullpath, type)
+            else: self.upload_dir(fullpath, prefix, user, container)
 
 class ClientWrapper(object):
     """Wraps client methods used by transfer.upload()
     to ModularBackend methods"""
     
-    def __init__(self):
-        options = getattr(settings, 'BACKEND', None)[1]
-        self.backend = ModularBackend(*options)
+    def __init__(self, backend):
+        self.backend = backend
         self.block_size = self.backend.block_size
         self.block_hash = self.backend.hash_algorithm
     
@@ -131,42 +184,38 @@ class ClientWrapper(object):
                 'hash':  md5.hexdigest().lower()}
         self.backend.update_object_hashmap(account, account, container, object, 0, [], meta)   
     
-    def create_object_by_hashmap(self, container, object, map):
+    def create_object_by_hashmap(self, container, object, map, mimetype=None):
         hashmap = HashMap(self.block_size, self.block_hash)
-        for hash in map['hashes']:
-            hashmap.append(hash)
+        for h in map['hashes']:
+            hashmap.append(h)
         meta = {'hash':hexlify(hashmap.hash())}
+        if mimetype:
+            meta['content-type'] = mimetype
         size = map['bytes']
         try:
             args = [self.account, self.account, container, object, size,  map['hashes'], meta]
-            self.backend.update_object_hashmap(*args)
+            return self.backend.update_object_hashmap(*args)
         except IndexError, ie:
             fault = Fault(ie.data, 409)
             raise fault
     
-    def create_object(self, container, object, f):
-        hashmap = HashMap(self.block_size, self.block_hash)
-        hashmap.load(f)
-        map =  [hexlify(x) for x in hashmap]
-        meta = {'hash':hashmap.hash()}
-        size = hashmap.size
-        self.backend.update_object_hashmap(self.account, self.account, container, object, size,  hashmap, meta)
+    def update_container_data(self, container, f):
+        #just put the blocks
+        for block in file_read_iterator(f, self.block_size):
+            self.backend.put_block(block)
     
     def retrieve_container_metadata(self, container):
         return {'x-container-block-size':self.block_size,
                 'x-container-block-hash':self.block_hash}
-    
+
 if __name__ == "__main__":
-    db = ''
+    old_db = ''
     
-    basepath = options = getattr(settings, 'PROJECT_PATH', None)
-    params = {'db':db,
-              'path':os.path.join(basepath, 'data/pithos/'),
-              'block_size':(4 * 1024 * 1024),
-              'hash_algorithm':'sha256'}
+    ot = ObjectMigration(old_db)
+    #ot.create_default_containers()
+    #ot.create_objects()
     
-    ot = ObjectMigration(**params)
-    ot.create_default_containers()
-    ot.create_objects()
+    p = ''
+    ot.upload_dir(p, p, 'chstath', 'linux')
     
     
\ No newline at end of file