#!/usr/bin/env python # Copyright 2011 GRNET S.A. All rights reserved. # # Redistribution and use in source and binary forms, with or # without modification, are permitted provided that the following # conditions are met: # # 1. Redistributions of source code must retain the above # copyright notice, this list of conditions and the following # disclaimer. # # 2. Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials # provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # The views and conclusions contained in the software and # documentation are those of the authors and should not be # interpreted as representing official policies, either expressed # or implied, of GRNET S.A. from binascii import hexlify from sqlalchemy import create_engine from sqlalchemy import Table, Column, String, MetaData from sqlalchemy.sql import select from pithos import settings from pithos.backends.modular import ModularBackend from lib.hashmap import HashMap from lib.migrate import Migration class DataMigration(Migration): def __init__(self, db): Migration.__init__(self, db) # XXX Need more columns for primary key - last modified timestamp... columns=[] columns.append(Column('path', String(2048), primary_key=True)) columns.append(Column('hash', String(255))) self.files = Table('files', self.metadata, *columns) self.metadata.create_all(self.engine) def cache_put(self, path, hash): # Insert or replace. s = self.files.delete().where(self.files.c.path==path) r = self.conn.execute(s) r.close() s = self.files.insert() r = self.conn.execute(s, {'path': path, 'hash': hash}) r.close() def cache_get(self, path): s = select([self.files.c.hash], self.files.c.path == path) r = self.conn.execute(s) l = r.fetchone() r.close() if not l: return l return l[0] def execute(self): blocksize = self.backend.block_size blockhash = self.backend.hash_algorithm # Loop for all available files. filebody = Table('filebody', self.metadata, autoload=True) s = select([filebody.c.storedfilepath]) rp = self.conn.execute(s) paths = rp.fetchall() rp.close() for path in paths: map = HashMap(blocksize, blockhash) map.load(path) hash = hexlify(map.hash()) if hash != self.cache_get(path): missing = self.backend.blocker.block_ping(map) # XXX Backend hack... status = '[>] ' + path if missing: status += ' - %d block(s) missing' % len(missing) with open(path) as fp: for h in missing: offset = map.index(h) * blocksize fp.seek(offset) block = fp.read(blocksize) self.backend.put_block(block) else: status += ' - no blocks missing' self.cache_put(path, hash) else: status = '[-] ' + path print status if __name__ == "__main__": db = 'sqlite:///migrate.db' dt = DataMigration(db) dt.execute()