From: Sofia Papagiannaki Date: Thu, 29 Sep 2011 12:15:20 +0000 (+0300) Subject: Migration Tools: Progess II X-Git-Tag: pithos/v0.7.8~18^2 X-Git-Url: https://code.grnet.gr/git/pithos/commitdiff_plain/98137a34874d74712e9969d564b46a5f0e4ebbe9 Migration Tools: Progess II Refs #1171 --- diff --git a/tools/lib/migrate.py b/tools/lib/migrate.py new file mode 100644 index 0000000..453f4a1 --- /dev/null +++ b/tools/lib/migrate.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +# Copyright 2011 GRNET S.A. All rights reserved. +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# 1. Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# 2. Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS +# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# The views and conclusions contained in the software and +# documentation are those of the authors and should not be +# interpreted as representing official policies, either expressed +# or implied, of GRNET S.A. + +from sqlalchemy import create_engine +from sqlalchemy import Table, MetaData + +from pithos.backends.modular import ModularBackend + +class Migration(object): + def __init__(self, db): + self.engine = create_engine(db) + self.metadata = MetaData(self.engine) + #self.engine.echo = True + self.conn = self.engine.connect() + + def execute(self): + pass \ No newline at end of file diff --git a/tools/lib/transfer.py b/tools/lib/transfer.py index cd3548b..3456fe1 100644 --- a/tools/lib/transfer.py +++ b/tools/lib/transfer.py @@ -32,6 +32,7 @@ # or implied, of GRNET S.A. import os +import types from hashmap import HashMap from binascii import hexlify, unhexlify @@ -39,7 +40,7 @@ from cStringIO import StringIO from client import Fault -def upload(client, file, container, prefix): +def upload(client, file, container, prefix, name=None): meta = client.retrieve_container_metadata(container) blocksize = int(meta['x-container-block-size']) @@ -50,7 +51,8 @@ def upload(client, file, container, prefix): hashes.load(file) map = {'bytes': size, 'hashes': [hexlify(x) for x in hashes]} - object = prefix + os.path.split(file)[-1] + objectname = name if name else os.path.split(file)[-1] + object = prefix + objectname try: client.create_object_by_hashmap(container, object, map) except Fault, fault: @@ -59,7 +61,11 @@ def upload(client, file, container, prefix): else: return - missing = fault.data.split('\n') + if type(fault.data) == types.StringType: + missing = fault.data.split('\n') + elif type(fault.data) == types.ListType: + missing = fault.data + if '' in missing: del missing[missing.index(''):] diff --git a/tools/migrate-data b/tools/migrate-data index 4e93d33..490d87e 100755 --- a/tools/migrate-data +++ b/tools/migrate-data @@ -43,20 +43,7 @@ from pithos import settings from pithos.backends.modular import ModularBackend from lib.hashmap import HashMap - - -class Migration(object): - def __init__(self, db): - self.engine = create_engine(db) - self.metadata = MetaData(self.engine) - #self.engine.echo = True - self.conn = self.engine.connect() - - options = getattr(settings, 'BACKEND', None)[1] - self.backend = ModularBackend(*options) - - def execute(self): - pass +from lib.migrate import Migration class DataMigration(Migration): def __init__(self, db): @@ -91,7 +78,13 @@ class DataMigration(Migration): blockhash = self.backend.hash_algorithm # Loop for all available files. - for path in ['README', 'store', 'test']: + filebody = Table('filebody', self.metadata, autoload=True) + s = select([filebody.c.storedfilepath]) + rp = self.conn.execute(s) + paths = rp.fetchall() + rp.close() + + for path in paths: map = HashMap(blocksize, blockhash) map.load(path) hash = hexlify(map.hash()) diff --git a/tools/migrate_db b/tools/migrate_db new file mode 100755 index 0000000..1e9a499 --- /dev/null +++ b/tools/migrate_db @@ -0,0 +1,172 @@ +#!/usr/bin/env python + +# Copyright 2011 GRNET S.A. All rights reserved. +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# 1. Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# 2. Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS +# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# The views and conclusions contained in the software and +# documentation are those of the authors and should not be +# interpreted as representing official policies, either expressed +# or implied, of GRNET S.A. + +from sqlalchemy import Table +from sqlalchemy.sql import select + +from binascii import hexlify + +from pithos.backends.lib.hashfiler import Blocker +from pithos.aai.models import PithosUser + +from django.conf import settings + +from pithos.backends.modular import ModularBackend + +from lib.transfer import upload +from lib.hashmap import HashMap +from lib.client import Fault +from lib.migrate import Migration + +import json +import os +import sys +import hashlib + +class ObjectMigration(DataMigration): + def __init__(self, db, path, block_size, hash_algorithm): + DataMigration.__init__(self, db, path, block_size, hash_algorithm) + self.wrapper = ClientWrapper() + + def create_default_containers(self): + users = PithosUser.objects.all() + for u in users: + print '#', u.uniq + try: + self.wrapper.create_container('pithos', u.uniq) + self.wrapper.create_container('trash', u.uniq) + except NameError, e: + pass + + def get_path(self, child_id): + folderTable = Table('folder', self.metadata, autoload=True) + s = select([folderTable.c.parent_id, folderTable.c.name]) + s = s.where(folderTable.c.id == child_id) + rp = self.conn.execute(s) + parent_id, foldername = rp.fetchone() + if not parent_id: + return '' + else: + return '%s/%s' %(self.get_path(parent_id), foldername) + + def create_objects(self): + fileheader = Table('fileheader', self.metadata, autoload=True) + filebody = Table('filebody', self.metadata, autoload=True) + folder = Table('folder', self.metadata, autoload=True) + gss_user = Table('gss_user', self.metadata, autoload=True) + j = filebody.join(fileheader, filebody.c.header_id == fileheader.c.id) + j = j.join(folder, fileheader.c.folder_id == folder.c.id) + j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id) + s = select([gss_user.c.username, fileheader.c.folder_id, fileheader.c.name, + filebody.c.storedfilepath], from_obj=j) + rp = self.conn.execute(s) + objects = rp.fetchall() + for username, folderid, filename, filepath in objects: + path = self.get_path(folderid)[1:] + obj = '' + #create directory markers + for f in path.split('/'): + obj = '%s/%s' %(obj, f) if obj else f + try: + self.wrapper.create_directory_marker('pithos', obj, username) + except NameError, e: + pass + self.wrapper.set_account(username) + + print '#', username, path, filename + prefix = '%s/' %path if path else '' + upload(self.wrapper, filepath, 'pithos', prefix, filename) + +class ClientWrapper(object): + """Wraps client methods used by transfer.upload() + to ModularBackend methods""" + + def __init__(self): + options = getattr(settings, 'BACKEND', None)[1] + self.backend = ModularBackend(*options) + self.block_size = self.backend.block_size + self.block_hash = self.backend.hash_algorithm + + def set_account(self, account): + self.account = account + + def create_container(self, container, account=None, **meta): + self.backend.put_container(account, account, container, meta) + + def create_directory_marker(self, container, object, account=None): + md5 = hashlib.md5() + meta = {'Content-Type':'application/directory', + 'hash': md5.hexdigest().lower()} + self.backend.update_object_hashmap(account, account, container, object, 0, [], meta) + + def create_object_by_hashmap(self, container, object, map): + hashmap = HashMap(self.block_size, self.block_hash) + for hash in map['hashes']: + hashmap.append(hash) + meta = {'hash':hexlify(hashmap.hash())} + size = map['bytes'] + try: + args = [self.account, self.account, container, object, size, map['hashes'], meta] + self.backend.update_object_hashmap(*args) + except IndexError, ie: + fault = Fault(ie.data, 409) + raise fault + + def create_object(self, container, object, f): + hashmap = HashMap(self.block_size, self.block_hash) + hashmap.load(f) + map = [hexlify(x) for x in hashmap] + meta = {'hash':hashmap.hash()} + size = hashmap.size + self.backend.update_object_hashmap(self.account, self.account, container, object, size, hashmap, meta) + + def retrieve_container_metadata(self, container): + return {'x-container-block-size':self.block_size, + 'x-container-block-hash':self.block_hash} + +if __name__ == "__main__": + db = '' + + basepath = options = getattr(settings, 'PROJECT_PATH', None) + params = {'db':db, + 'path':os.path.join(basepath, 'data/pithos/'), + 'block_size':(4 * 1024 * 1024), + 'hash_algorithm':'sha256'} + + ot = ObjectMigration(**params) + ot.create_default_containers() + ot.create_objects() + + \ No newline at end of file diff --git a/tools/migrate_users b/tools/migrate_users new file mode 100755 index 0000000..78091f0 --- /dev/null +++ b/tools/migrate_users @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +# Copyright 2011 GRNET S.A. All rights reserved. +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# 1. Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# 2. Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS +# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# The views and conclusions contained in the software and +# documentation are those of the authors and should not be +# interpreted as representing official policies, either expressed +# or implied, of GRNET S.A. + +from lib.migrate import Migration + +from sqlalchemy import Table +from pithos.aai.models import PithosUser + +import base64 + +class UserMigration(Migration): + def __init__(self, db): + Migration.__init__(self, db) + self.gss_users = Table('gss_user', self.metadata, autoload=True) + + def execute(self): + s = self.gss_users.select() + users = self.conn.execute(s).fetchall() + l = [] + for u in users: + user = PithosUser() + user.pk = u['id'] + user.uniq = u['username'] + user.realname = u['name'] + user.is_admin = False + user.affiliation = u['homeorganization'] if u['homeorganization'] else '' + user.auth_token = base64.b64encode(u['authtoken']) + user.auth_token_created = u['creationdate'] + user.auth_token_expires = u['authtokenexpirydate'] + user.created = u['creationdate'] + user.updated = u['modificationdate'] + print '#', user + user.save(update_timestamps=False) + +if __name__ == "__main__": + db = 'postgresql://gss:m0ust@rda@62.217.112.56/pithos' + m = UserMigration(db) + m.execute() \ No newline at end of file diff --git a/tools/migration b/tools/migration deleted file mode 100755 index 7570b75..0000000 --- a/tools/migration +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2011 GRNET S.A. All rights reserved. -# -# Redistribution and use in source and binary forms, with or -# without modification, are permitted provided that the following -# conditions are met: -# -# 1. Redistributions of source code must retain the above -# copyright notice, this list of conditions and the following -# disclaimer. -# -# 2. Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials -# provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS -# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF -# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# -# The views and conclusions contained in the software and -# documentation are those of the authors and should not be -# interpreted as representing official policies, either expressed -# or implied, of GRNET S.A. - -from sqlalchemy import create_engine -from sqlalchemy import Table, MetaData -from sqlalchemy.sql import select - -from pithos.api.util import hashmap_hash, get_container_headers -from pithos.backends.lib.hashfiler import Blocker, Mapper -from pithos.aai.models import PithosUser - -from django.conf import settings - -from pithos.backends.modular import ModularBackend - -import json -import base64 -import os - -class Migration(object): - def __init__(self, db): - self.engine = create_engine(db) - self.metadata = MetaData(self.engine) - #self.engine.echo = True - self.conn = self.engine.connect() - - def execute(self): - pass - -class UserMigration(Migration): - def __init__(self, db): - Migration.__init__(self, db) - self.gss_users = Table('gss_user', self.metadata, autoload=True) - - def execute(self): - s = self.gss_users.select() - users = self.conn.execute(s).fetchall() - l = [] - for u in users: - user = PithosUser() - user.pk = u['id'] - user.uniq = u['username'] - user.realname = u['name'] - user.is_admin = False - user.affiliation = u['homeorganization'] if u['homeorganization'] else '' - user.auth_token = base64.b64encode(u['authtoken']) - user.auth_token_created = u['creationdate'] - user.auth_token_expires = u['authtokenexpirydate'] - user.created = u['creationdate'] - user.updated = u['modificationdate'] - print '#', user - user.save(update_timestamps=False) - -class DataMigration(Migration): - def __init__(self, db, path, block_size, hash_algorithm): - Migration.__init__(self, db) - params = {'blocksize': block_size, - 'blockpath': os.path.join(path + '/blocks'), - 'hashtype': hash_algorithm} - self.blocker = Blocker(**params) - - params = {'mappath': os.path.join(path + '/maps'), - 'namelen': self.blocker.hashlen} - self.mapper = Mapper(**params) - - def execute(self): - filebody = Table('filebody', self.metadata, autoload=True) - s = select([filebody.c.id, filebody.c.storedfilepath]) - rp = self.conn.execute(s) - - while True: - t = rp.fetchone() - if not t: - break - id, path = t - print '#', id, path - hashlist = self.blocker.block_stor_file(open(path))[1] - self.mapper.map_stor(id, hashlist) - rp.close() - -class ObjectMigration(DataMigration): - def __init__(self, db, path, block_size, hash_algorithm): - DataMigration.__init__(self, db, path, block_size, hash_algorithm) - options = getattr(settings, 'BACKEND', None)[1] - self.backend = ModularBackend(*options) - - def create_default_containers(self): - users = PithosUser.objects.all() - for u in users: - try: - self.backend.put_container(u.uniq, u.uniq, 'pithos', {}) - self.backend.put_container(u.uniq, u.uniq, 'trash', {}) - except NameError, e: - pass - - def create_directory_markers(self, parent_id=None, path=None): - folderTable = Table('folder', self.metadata, autoload=True) - userTable = Table('gss_user', self.metadata, autoload=True) - s = select([folderTable.c.id, folderTable.c.name, userTable.c.username]) - s = s.where(folderTable.c.parent_id == parent_id) - s = s.where(folderTable.c.owner_id == userTable.c.id) - rp = self.conn.execute(s) - while True: - t = rp.fetchone() - if not t: - path = None - break - id, name, uuniq = t[0], t[1], t[2] - #print id, name, uuniq - if parent_id: - obj = '%s/%s' %(path, name) if path else name - print '#', obj - self.backend.update_object_hashmap(uuniq, uuniq, 'pithos', obj, 0, []) - else: - obj = '' - self.create_directory_markers(id, path=obj) - rp.close() - path = None - - def execute(self): - filebody = Table('filebody', self.metadata, autoload=True) - s = select([filebody.c.id]) - rp = self.conn.execute(s) - while True: - id = rp.fetchone() - if not id: - break - meta = {} - hashlist = self.mapper.map_retr(id) - #hashmap = d['hashes'] - #size = int(d['bytes']) - #meta.update({'hash': hashmap_hash(request, hashmap)}) - #version_id = backend.update_object_hashmap(request.user, v_account, - # v_container, v_object, - # size, hashmap) - rp.close() - -if __name__ == "__main__": - db = '' - t = UserMigration(db) - t.execute() - - basepath = options = getattr(settings, 'PROJECT_PATH', None) - params = {'db':db, - 'path':os.path.join(basepath, 'data/pithos/'), - 'block_size':(4 * 1024 * 1024), - 'hash_algorithm':'sha256'} - dt = DataMigration(**params) - dt.execute() - - ot = ObjectMigration(**params) - ot.create_default_containers() - ot.create_directory_markers() - \ No newline at end of file