Migration Tools: Progess II
authorSofia Papagiannaki <papagian@gmail.com>
Thu, 29 Sep 2011 12:15:20 +0000 (15:15 +0300)
committerSofia Papagiannaki <papagian@gmail.com>
Thu, 29 Sep 2011 12:15:20 +0000 (15:15 +0300)
Refs #1171

tools/lib/migrate.py [new file with mode: 0644]
tools/lib/transfer.py
tools/migrate-data
tools/migrate_db [new file with mode: 0755]
tools/migrate_users [new file with mode: 0755]
tools/migration [deleted file]

diff --git a/tools/lib/migrate.py b/tools/lib/migrate.py
new file mode 100644 (file)
index 0000000..453f4a1
--- /dev/null
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+
+# Copyright 2011 GRNET S.A. All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or
+# without modification, are permitted provided that the following
+# conditions are met:
+# 
+#   1. Redistributions of source code must retain the above
+#      copyright notice, this list of conditions and the following
+#      disclaimer.
+# 
+#   2. Redistributions in binary form must reproduce the above
+#      copyright notice, this list of conditions and the following
+#      disclaimer in the documentation and/or other materials
+#      provided with the distribution.
+# 
+# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
+# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+# 
+# The views and conclusions contained in the software and
+# documentation are those of the authors and should not be
+# interpreted as representing official policies, either expressed
+# or implied, of GRNET S.A.
+
+from sqlalchemy import create_engine
+from sqlalchemy import Table, MetaData
+
+from pithos.backends.modular import ModularBackend
+
+class Migration(object):
+    def __init__(self, db):
+        self.engine = create_engine(db)
+        self.metadata = MetaData(self.engine)
+        #self.engine.echo = True
+        self.conn = self.engine.connect()
+    
+    def execute(self):
+        pass
\ No newline at end of file
index cd3548b..3456fe1 100644 (file)
@@ -32,6 +32,7 @@
 # or implied, of GRNET S.A.
 
 import os
+import types
 
 from hashmap import HashMap
 from binascii import hexlify, unhexlify
@@ -39,7 +40,7 @@ from cStringIO import StringIO
 from client import Fault
 
 
-def upload(client, file, container, prefix):
+def upload(client, file, container, prefix, name=None):
     
     meta = client.retrieve_container_metadata(container)
     blocksize = int(meta['x-container-block-size'])
@@ -50,7 +51,8 @@ def upload(client, file, container, prefix):
     hashes.load(file)
     map = {'bytes': size, 'hashes': [hexlify(x) for x in hashes]}
     
-    object = prefix + os.path.split(file)[-1]
+    objectname = name if name else os.path.split(file)[-1]
+    object = prefix + objectname
     try:
         client.create_object_by_hashmap(container, object, map)
     except Fault, fault:
@@ -59,7 +61,11 @@ def upload(client, file, container, prefix):
     else:
         return
     
-    missing = fault.data.split('\n')
+    if type(fault.data) == types.StringType:
+        missing = fault.data.split('\n')
+    elif type(fault.data) == types.ListType:
+        missing = fault.data
+    
     if '' in missing:
         del missing[missing.index(''):]
     
index 4e93d33..490d87e 100755 (executable)
@@ -43,20 +43,7 @@ from pithos import settings
 from pithos.backends.modular import ModularBackend
 
 from lib.hashmap import HashMap
-
-
-class Migration(object):
-    def __init__(self, db):
-        self.engine = create_engine(db)
-        self.metadata = MetaData(self.engine)
-        #self.engine.echo = True
-        self.conn = self.engine.connect()
-        
-        options = getattr(settings, 'BACKEND', None)[1]
-        self.backend = ModularBackend(*options)
-    
-    def execute(self):
-        pass
+from lib.migrate import Migration
 
 class DataMigration(Migration):
     def __init__(self, db):
@@ -91,7 +78,13 @@ class DataMigration(Migration):
         blockhash = self.backend.hash_algorithm
         
         # Loop for all available files.
-        for path in ['README', 'store', 'test']:
+        filebody = Table('filebody', self.metadata, autoload=True)
+        s = select([filebody.c.storedfilepath])
+        rp = self.conn.execute(s)
+        paths = rp.fetchall()
+        rp.close()
+        
+        for path in paths:
             map = HashMap(blocksize, blockhash)
             map.load(path)
             hash = hexlify(map.hash())
diff --git a/tools/migrate_db b/tools/migrate_db
new file mode 100755 (executable)
index 0000000..1e9a499
--- /dev/null
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+
+# Copyright 2011 GRNET S.A. All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or
+# without modification, are permitted provided that the following
+# conditions are met:
+# 
+#   1. Redistributions of source code must retain the above
+#      copyright notice, this list of conditions and the following
+#      disclaimer.
+# 
+#   2. Redistributions in binary form must reproduce the above
+#      copyright notice, this list of conditions and the following
+#      disclaimer in the documentation and/or other materials
+#      provided with the distribution.
+# 
+# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
+# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+# 
+# The views and conclusions contained in the software and
+# documentation are those of the authors and should not be
+# interpreted as representing official policies, either expressed
+# or implied, of GRNET S.A.
+
+from sqlalchemy import Table
+from sqlalchemy.sql import select
+
+from binascii import hexlify
+
+from pithos.backends.lib.hashfiler import Blocker
+from pithos.aai.models import PithosUser
+
+from django.conf import settings
+
+from pithos.backends.modular import ModularBackend
+
+from lib.transfer import upload
+from lib.hashmap import HashMap
+from lib.client import Fault
+from lib.migrate import Migration
+
+import json
+import os
+import sys
+import hashlib
+
+class ObjectMigration(DataMigration):
+    def __init__(self, db, path, block_size, hash_algorithm):
+        DataMigration.__init__(self, db, path, block_size, hash_algorithm)
+        self.wrapper = ClientWrapper()
+    
+    def create_default_containers(self):
+        users = PithosUser.objects.all()
+        for u in users:
+            print '#', u.uniq
+            try:
+                self.wrapper.create_container('pithos', u.uniq)
+                self.wrapper.create_container('trash', u.uniq)
+            except NameError, e:
+                pass
+    
+    def get_path(self, child_id):
+        folderTable = Table('folder', self.metadata, autoload=True)
+        s = select([folderTable.c.parent_id, folderTable.c.name])
+        s = s.where(folderTable.c.id == child_id)
+        rp = self.conn.execute(s)
+        parent_id, foldername = rp.fetchone()
+        if not parent_id:
+            return ''
+        else:
+            return '%s/%s' %(self.get_path(parent_id), foldername)
+    
+    def create_objects(self):
+        fileheader = Table('fileheader', self.metadata, autoload=True)
+        filebody = Table('filebody', self.metadata, autoload=True)
+        folder = Table('folder', self.metadata, autoload=True)
+        gss_user = Table('gss_user', self.metadata, autoload=True)
+        j = filebody.join(fileheader, filebody.c.header_id == fileheader.c.id)
+        j = j.join(folder, fileheader.c.folder_id == folder.c.id)
+        j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id)
+        s = select([gss_user.c.username,  fileheader.c.folder_id, fileheader.c.name,
+                    filebody.c.storedfilepath], from_obj=j)
+        rp = self.conn.execute(s)
+        objects = rp.fetchall()
+        for username, folderid, filename, filepath in objects:
+            path = self.get_path(folderid)[1:]
+            obj = ''
+            #create directory markers
+            for f in path.split('/'):
+                obj = '%s/%s' %(obj, f) if obj else f
+                try:
+                    self.wrapper.create_directory_marker('pithos', obj, username)
+                except NameError, e:
+                    pass
+            self.wrapper.set_account(username)
+            
+            print '#', username, path, filename
+            prefix = '%s/' %path if path else ''
+            upload(self.wrapper, filepath, 'pithos', prefix, filename)
+
+class ClientWrapper(object):
+    """Wraps client methods used by transfer.upload()
+    to ModularBackend methods"""
+    
+    def __init__(self):
+        options = getattr(settings, 'BACKEND', None)[1]
+        self.backend = ModularBackend(*options)
+        self.block_size = self.backend.block_size
+        self.block_hash = self.backend.hash_algorithm
+    
+    def set_account(self, account):
+        self.account = account
+    
+    def create_container(self, container, account=None, **meta):
+        self.backend.put_container(account, account, container, meta)
+    
+    def create_directory_marker(self, container, object, account=None):
+        md5 = hashlib.md5()
+        meta = {'Content-Type':'application/directory',
+                'hash':  md5.hexdigest().lower()}
+        self.backend.update_object_hashmap(account, account, container, object, 0, [], meta)   
+    
+    def create_object_by_hashmap(self, container, object, map):
+        hashmap = HashMap(self.block_size, self.block_hash)
+        for hash in map['hashes']:
+            hashmap.append(hash)
+        meta = {'hash':hexlify(hashmap.hash())}
+        size = map['bytes']
+        try:
+            args = [self.account, self.account, container, object, size,  map['hashes'], meta]
+            self.backend.update_object_hashmap(*args)
+        except IndexError, ie:
+            fault = Fault(ie.data, 409)
+            raise fault
+    
+    def create_object(self, container, object, f):
+        hashmap = HashMap(self.block_size, self.block_hash)
+        hashmap.load(f)
+        map =  [hexlify(x) for x in hashmap]
+        meta = {'hash':hashmap.hash()}
+        size = hashmap.size
+        self.backend.update_object_hashmap(self.account, self.account, container, object, size,  hashmap, meta)
+    
+    def retrieve_container_metadata(self, container):
+        return {'x-container-block-size':self.block_size,
+                'x-container-block-hash':self.block_hash}
+    
+if __name__ == "__main__":
+    db = ''
+    
+    basepath = options = getattr(settings, 'PROJECT_PATH', None)
+    params = {'db':db,
+              'path':os.path.join(basepath, 'data/pithos/'),
+              'block_size':(4 * 1024 * 1024),
+              'hash_algorithm':'sha256'}
+    
+    ot = ObjectMigration(**params)
+    ot.create_default_containers()
+    ot.create_objects()
+    
+    
\ No newline at end of file
diff --git a/tools/migrate_users b/tools/migrate_users
new file mode 100755 (executable)
index 0000000..78091f0
--- /dev/null
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+
+# Copyright 2011 GRNET S.A. All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or
+# without modification, are permitted provided that the following
+# conditions are met:
+# 
+#   1. Redistributions of source code must retain the above
+#      copyright notice, this list of conditions and the following
+#      disclaimer.
+# 
+#   2. Redistributions in binary form must reproduce the above
+#      copyright notice, this list of conditions and the following
+#      disclaimer in the documentation and/or other materials
+#      provided with the distribution.
+# 
+# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
+# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+# 
+# The views and conclusions contained in the software and
+# documentation are those of the authors and should not be
+# interpreted as representing official policies, either expressed
+# or implied, of GRNET S.A.
+
+from lib.migrate import Migration
+
+from sqlalchemy import Table
+from pithos.aai.models import PithosUser
+
+import base64
+
+class UserMigration(Migration):
+    def __init__(self, db):
+        Migration.__init__(self, db)
+        self.gss_users = Table('gss_user', self.metadata, autoload=True)
+    
+    def execute(self):
+        s = self.gss_users.select()
+        users = self.conn.execute(s).fetchall()
+        l = []
+        for u in users:
+            user = PithosUser()
+            user.pk = u['id']
+            user.uniq = u['username']
+            user.realname = u['name']
+            user.is_admin = False
+            user.affiliation = u['homeorganization'] if u['homeorganization'] else ''
+            user.auth_token = base64.b64encode(u['authtoken'])
+            user.auth_token_created = u['creationdate']
+            user.auth_token_expires = u['authtokenexpirydate']
+            user.created = u['creationdate']
+            user.updated = u['modificationdate']
+            print '#', user
+            user.save(update_timestamps=False)
+
+if __name__ == "__main__":
+    db = 'postgresql://gss:m0ust@rda@62.217.112.56/pithos'
+    m = UserMigration(db)
+    m.execute()
\ No newline at end of file
diff --git a/tools/migration b/tools/migration
deleted file mode 100755 (executable)
index 7570b75..0000000
+++ /dev/null
@@ -1,186 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2011 GRNET S.A. All rights reserved.
-# 
-# Redistribution and use in source and binary forms, with or
-# without modification, are permitted provided that the following
-# conditions are met:
-# 
-#   1. Redistributions of source code must retain the above
-#      copyright notice, this list of conditions and the following
-#      disclaimer.
-# 
-#   2. Redistributions in binary form must reproduce the above
-#      copyright notice, this list of conditions and the following
-#      disclaimer in the documentation and/or other materials
-#      provided with the distribution.
-# 
-# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
-# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
-# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-# 
-# The views and conclusions contained in the software and
-# documentation are those of the authors and should not be
-# interpreted as representing official policies, either expressed
-# or implied, of GRNET S.A.
-
-from sqlalchemy import create_engine
-from sqlalchemy import Table, MetaData
-from sqlalchemy.sql import select
-
-from pithos.api.util import hashmap_hash, get_container_headers
-from pithos.backends.lib.hashfiler import Blocker, Mapper
-from pithos.aai.models import PithosUser
-
-from django.conf import settings
-
-from pithos.backends.modular import ModularBackend
-
-import json
-import base64
-import os
-
-class Migration(object):
-    def __init__(self, db):
-        self.engine = create_engine(db)
-        self.metadata = MetaData(self.engine)
-        #self.engine.echo = True
-        self.conn = self.engine.connect()
-    
-    def execute(self):
-        pass
-    
-class UserMigration(Migration):
-    def __init__(self, db):
-        Migration.__init__(self, db)
-        self.gss_users = Table('gss_user', self.metadata, autoload=True)
-    
-    def execute(self):
-        s = self.gss_users.select()
-        users = self.conn.execute(s).fetchall()
-        l = []
-        for u in users:
-            user = PithosUser()
-            user.pk = u['id']
-            user.uniq = u['username']
-            user.realname = u['name']
-            user.is_admin = False
-            user.affiliation = u['homeorganization'] if u['homeorganization'] else ''
-            user.auth_token = base64.b64encode(u['authtoken'])
-            user.auth_token_created = u['creationdate']
-            user.auth_token_expires = u['authtokenexpirydate']
-            user.created = u['creationdate']
-            user.updated = u['modificationdate']
-            print '#', user
-            user.save(update_timestamps=False)
-    
-class DataMigration(Migration):
-    def __init__(self, db, path, block_size, hash_algorithm):
-        Migration.__init__(self, db)
-        params = {'blocksize': block_size,
-                  'blockpath': os.path.join(path + '/blocks'),
-                  'hashtype': hash_algorithm}
-        self.blocker = Blocker(**params)
-        
-        params = {'mappath': os.path.join(path + '/maps'),
-                  'namelen': self.blocker.hashlen}
-        self.mapper = Mapper(**params)
-    
-    def execute(self):
-        filebody = Table('filebody', self.metadata, autoload=True)
-        s = select([filebody.c.id, filebody.c.storedfilepath])
-        rp = self.conn.execute(s)
-        
-        while True:
-            t = rp.fetchone()
-            if not t:
-                break
-            id, path = t
-            print '#', id, path
-            hashlist = self.blocker.block_stor_file(open(path))[1]
-            self.mapper.map_stor(id, hashlist)
-        rp.close()
-
-class ObjectMigration(DataMigration):
-    def __init__(self, db, path, block_size, hash_algorithm):
-        DataMigration.__init__(self, db, path, block_size, hash_algorithm)
-        options = getattr(settings, 'BACKEND', None)[1]
-        self.backend = ModularBackend(*options)
-    
-    def create_default_containers(self):
-        users = PithosUser.objects.all()
-        for u in users:
-            try:
-                self.backend.put_container(u.uniq, u.uniq, 'pithos', {})
-                self.backend.put_container(u.uniq, u.uniq, 'trash', {})
-            except NameError, e:
-                pass
-    
-    def create_directory_markers(self, parent_id=None, path=None):
-        folderTable = Table('folder', self.metadata, autoload=True)
-        userTable = Table('gss_user', self.metadata, autoload=True)
-        s = select([folderTable.c.id, folderTable.c.name, userTable.c.username])
-        s = s.where(folderTable.c.parent_id == parent_id)
-        s = s.where(folderTable.c.owner_id == userTable.c.id)
-        rp = self.conn.execute(s)
-        while True:
-            t = rp.fetchone()
-            if not t:
-                path = None
-                break
-            id, name, uuniq = t[0], t[1], t[2]
-            #print id, name, uuniq
-            if parent_id:
-                obj = '%s/%s' %(path, name) if path else name
-                print '#', obj
-                self.backend.update_object_hashmap(uuniq, uuniq, 'pithos', obj, 0, [])
-            else:
-                obj = ''
-            self.create_directory_markers(id, path=obj)
-        rp.close()
-        path = None
-    
-    def execute(self):
-        filebody = Table('filebody', self.metadata, autoload=True)
-        s = select([filebody.c.id])
-        rp = self.conn.execute(s)
-        while True:
-            id = rp.fetchone()
-            if not id:
-                break
-            meta = {}
-            hashlist = self.mapper.map_retr(id)
-            #hashmap = d['hashes']
-            #size = int(d['bytes'])
-            #meta.update({'hash': hashmap_hash(request, hashmap)})
-            #version_id = backend.update_object_hashmap(request.user, v_account,
-            #                                           v_container, v_object,
-            #                                           size, hashmap)
-        rp.close()
-    
-if __name__ == "__main__":
-    db = ''
-    t = UserMigration(db)
-    t.execute()
-    
-    basepath = options = getattr(settings, 'PROJECT_PATH', None)
-    params = {'db':db,
-              'path':os.path.join(basepath, 'data/pithos/'),
-              'block_size':(4 * 1024 * 1024),
-              'hash_algorithm':'sha256'}
-    dt = DataMigration(**params)
-    dt.execute()
-    
-    ot = ObjectMigration(**params)
-    ot.create_default_containers()
-    ot.create_directory_markers()
-    
\ No newline at end of file