from binascii import hexlify
from pithos.backends.lib.hashfiler import Blocker
+from pithos.backends.lib.sqlalchemy import Node
from pithos.aai.models import PithosUser
from django.conf import settings
-from pithos.backends.modular import ModularBackend
+from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED
+from pithos.backends.lib.sqlalchemy.node import Node
+from pithos.backends.lib.sqlalchemy.dbwrapper import DBWrapper
from lib.transfer import upload
-from lib.hashmap import HashMap
+from lib.hashmap import HashMap, file_read_iterator
from lib.client import Fault
from lib.migrate import Migration
import os
import sys
import hashlib
+import mimetypes
-class ObjectMigration(DataMigration):
- def __init__(self, db, path, block_size, hash_algorithm):
- DataMigration.__init__(self, db, path, block_size, hash_algorithm)
- self.wrapper = ClientWrapper()
-
+class ObjectMigration(Migration):
+ def __init__(self, old_db):
+ Migration.__init__(self, old_db)
+ self.wrapper = ClientWrapper(self.backend)
+ params = {'wrapper': DBWrapper(self.backend.db)}
+ self.node = Node(**params)
+
def create_default_containers(self):
users = PithosUser.objects.all()
for u in users:
else:
return '%s/%s' %(self.get_path(parent_id), foldername)
+ def create_object(self, username, container, object, filepath, mimetype):
+ obj = ''
+ path = '/'.join(object.split('/')[:-1])
+ name = object.split('/')[-1]
+ #create directory markers
+ for f in path.split('/'):
+ obj = '%s/%s' %(obj, f) if obj else f
+ try:
+ self.wrapper.create_directory_marker('pithos', obj, username)
+ except NameError, e:
+ pass
+ self.wrapper.set_account(username)
+
+ prefix = '%s/' %path if path else ''
+ print '#', filepath, container, prefix, name, mimetype
+ return upload(self.wrapper, filepath, container, prefix, name, mimetype)
+
+ def create_history(self, user, header_id, node_id, deleted=False):
+ filebody = Table('filebody', self.metadata, autoload=True)
+ gss_user = Table('gss_user', self.metadata, autoload=True)
+ j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id)
+ s = select([filebody.c.filesize, gss_user.c.username], from_obj=j)
+ s = s.where(filebody.c.header_id == header_id)
+ s = s.order_by(filebody.c.version)
+ rp = self.conn.execute(s)
+ versions = rp.fetchall()
+ print '#', len(versions)
+ rp.close()
+ i = 0
+ for size, modyfied_by in versions:
+ cluster = CLUSTER_HISTORY if i < len(versions) - 1 else CLUSTER_NORMAL
+ cluster = cluster if not deleted else CLUSTER_DELETED
+ args = (node_id, size, None, modyfied_by, cluster)
+ self.node.version_create(*args)
+ i += 1
+
def create_objects(self):
fileheader = Table('fileheader', self.metadata, autoload=True)
filebody = Table('filebody', self.metadata, autoload=True)
folder = Table('folder', self.metadata, autoload=True)
gss_user = Table('gss_user', self.metadata, autoload=True)
- j = filebody.join(fileheader, filebody.c.header_id == fileheader.c.id)
+ j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id)
j = j.join(folder, fileheader.c.folder_id == folder.c.id)
j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id)
- s = select([gss_user.c.username, fileheader.c.folder_id, fileheader.c.name,
- filebody.c.storedfilepath], from_obj=j)
+ s = select([gss_user.c.username, fileheader.c.id, fileheader.c.folder_id,
+ fileheader.c.name, fileheader.c.deleted, filebody.c.storedfilepath,
+ filebody.c.mimetype], from_obj=j)
rp = self.conn.execute(s)
objects = rp.fetchall()
- for username, folderid, filename, filepath in objects:
+ for username, headerid, folderid, filename, deleted, filepath, mimetype in objects:
path = self.get_path(folderid)[1:]
- obj = ''
- #create directory markers
- for f in path.split('/'):
- obj = '%s/%s' %(obj, f) if obj else f
- try:
- self.wrapper.create_directory_marker('pithos', obj, username)
- except NameError, e:
- pass
- self.wrapper.set_account(username)
-
- print '#', username, path, filename
- prefix = '%s/' %path if path else ''
- upload(self.wrapper, filepath, 'pithos', prefix, filename)
+ container = 'pithos' if not deleted else 'trash'
+ object = '%s/%s' %(path, filename)
+ #filepath = '/Users/butters/Downloads/torvalds-linux-0f86267'
+ vserial = self.create_object(username, container, object, filepath, mimetype)
+ nodeid = self.node.version_get_properties(vserial, keys=('node',))[0]
+ self.create_history(username, headerid, nodeid, deleted)
+ self.node.version_remove(vserial)
+ #self.set_metadata()
+ #self.set_public()
+ #self.statistics()
+ #self.set_permissions()
+
+ def handle_deleted(self):
+ pass
+
+ def upload_dir(self, dir, prefix, user, container):
+ for f in os.listdir(dir):
+ fullpath = '%s/%s' %(dir, f)
+ if os.path.isfile(fullpath):
+ type = mimetypes.guess_type(fullpath)[0]
+ name = '/'.join(fullpath.split(prefix)[1:])
+ print '@', user, container, name, fullpath, type
+ self.create_object(user, container, name, fullpath, type)
+ else: self.upload_dir(fullpath, prefix, user, container)
class ClientWrapper(object):
"""Wraps client methods used by transfer.upload()
to ModularBackend methods"""
- def __init__(self):
- options = getattr(settings, 'BACKEND', None)[1]
- self.backend = ModularBackend(*options)
+ def __init__(self, backend):
+ self.backend = backend
self.block_size = self.backend.block_size
self.block_hash = self.backend.hash_algorithm
'hash': md5.hexdigest().lower()}
self.backend.update_object_hashmap(account, account, container, object, 0, [], meta)
- def create_object_by_hashmap(self, container, object, map):
+ def create_object_by_hashmap(self, container, object, map, mimetype=None):
hashmap = HashMap(self.block_size, self.block_hash)
- for hash in map['hashes']:
- hashmap.append(hash)
+ for h in map['hashes']:
+ hashmap.append(h)
meta = {'hash':hexlify(hashmap.hash())}
+ if mimetype:
+ meta['content-type'] = mimetype
size = map['bytes']
try:
args = [self.account, self.account, container, object, size, map['hashes'], meta]
- self.backend.update_object_hashmap(*args)
+ return self.backend.update_object_hashmap(*args)
except IndexError, ie:
fault = Fault(ie.data, 409)
raise fault
- def create_object(self, container, object, f):
- hashmap = HashMap(self.block_size, self.block_hash)
- hashmap.load(f)
- map = [hexlify(x) for x in hashmap]
- meta = {'hash':hashmap.hash()}
- size = hashmap.size
- self.backend.update_object_hashmap(self.account, self.account, container, object, size, hashmap, meta)
+ def update_container_data(self, container, f):
+ #just put the blocks
+ for block in file_read_iterator(f, self.block_size):
+ self.backend.put_block(block)
def retrieve_container_metadata(self, container):
return {'x-container-block-size':self.block_size,
'x-container-block-hash':self.block_hash}
-
+
if __name__ == "__main__":
- db = ''
+ old_db = ''
- basepath = options = getattr(settings, 'PROJECT_PATH', None)
- params = {'db':db,
- 'path':os.path.join(basepath, 'data/pithos/'),
- 'block_size':(4 * 1024 * 1024),
- 'hash_algorithm':'sha256'}
+ ot = ObjectMigration(old_db)
+ #ot.create_default_containers()
+ #ot.create_objects()
- ot = ObjectMigration(**params)
- ot.create_default_containers()
- ot.create_objects()
+ p = ''
+ ot.upload_dir(p, p, 'chstath', 'linux')
\ No newline at end of file