X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/cb91d46e65af89484bf1f017e39730594fed836e..de47cf8ff14cf9ad034c7be97ab8feb2a427f52a:/lib/backend.py diff --git a/lib/backend.py b/lib/backend.py index 46b1047..1cea6e3 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +# # # Copyright (C) 2006, 2007 Google Inc. @@ -79,50 +79,36 @@ def StopMaster(): return True -def AddNode(dsa, dsapub, rsa, rsapub, ssh, sshpub): - """ adds the node to the cluster - - updates the hostkey - - adds the ssh-key - - sets the node id - - sets the node status to installed - """ - - f = open("/etc/ssh/ssh_host_rsa_key", 'w') - f.write(rsa) - f.close() - - f = open("/etc/ssh/ssh_host_rsa_key.pub", 'w') - f.write(rsapub) - f.close() - - f = open("/etc/ssh/ssh_host_dsa_key", 'w') - f.write(dsa) - f.close() +def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub): + """Joins this node to the cluster. - f = open("/etc/ssh/ssh_host_dsa_key.pub", 'w') - f.write(dsapub) - f.close() + This does the following: + - updates the hostkeys of the machine (rsa and dsa) + - adds the ssh private key to the user + - adds the ssh public key to the users' authorized_keys file - if not os.path.isdir("/root/.ssh"): - os.mkdir("/root/.ssh") + """ + sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600), + (constants.SSH_HOST_RSA_PUB, rsapub, 0644), + (constants.SSH_HOST_DSA_PRIV, dsa, 0600), + (constants.SSH_HOST_DSA_PUB, dsapub, 0644)] + for name, content, mode in sshd_keys: + utils.WriteFile(name, data=content, mode=mode) - f = open("/root/.ssh/id_dsa", 'w') - f.write(ssh) - f.close() + try: + priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS, + mkdir=True) + except errors.OpExecError, err: + logger.Error("Error while processing user ssh files: %s" % err) + return False - f = open("/root/.ssh/id_dsa.pub", 'w') - f.write(sshpub) - f.close() + for name, content in [(priv_key, sshkey), (pub_key, sshpub)]: + utils.WriteFile(name, data=content, mode=0600) - f = open('/root/.ssh/id_dsa.pub', 'r') - try: - utils.AddAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192)) - finally: - f.close() + utils.AddAuthorizedKey(auth_keys, sshpub) - utils.RunCmd(["/etc/init.d/ssh", "restart"]) + utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"]) - utils.RemoveFile("/root/.ssh/known_hosts") return True @@ -130,25 +116,30 @@ def LeaveCluster(): """Cleans up the current node and prepares it to be removed from the cluster. """ - if os.path.exists(constants.DATA_DIR): - for dirpath, dirnames, filenames in os.walk(constants.DATA_DIR): - if dirpath == constants.DATA_DIR: - for i in filenames: - os.unlink(os.path.join(dirpath, i)) + if os.path.isdir(constants.DATA_DIR): + for rel_name in utils.ListVisibleFiles(constants.DATA_DIR): + full_name = os.path.join(constants.DATA_DIR, rel_name) + if os.path.isfile(full_name) and not os.path.islink(full_name): + utils.RemoveFile(full_name) + + try: + priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS) + except errors.OpExecError, err: + logger.Error("Error while processing ssh files: %s" % err) + return - f = open('/root/.ssh/id_dsa.pub', 'r') + f = open(pub_key, 'r') try: - utils.RemoveAuthorizedKey('/root/.ssh/authorized_keys', f.read(8192)) + utils.RemoveAuthorizedKey(auth_keys, f.read(8192)) finally: f.close() - utils.RemoveFile('/root/.ssh/id_dsa') - utils.RemoveFile('/root/.ssh/id_dsa.pub') + utils.RemoveFile(priv_key) + utils.RemoveFile(pub_key) def GetNodeInfo(vgname): - """ gives back a hash with different informations - about the node + """Gives back a hash with different informations about the node. Returns: { 'vg_size' : xxx, 'vg_free' : xxx, 'memory_domain0': xxx, @@ -159,8 +150,8 @@ def GetNodeInfo(vgname): memory_dom0 is the memory allocated for domain0 in MiB memory_free is the currently available (free) ram in MiB memory_total is the total number of ram in MiB - """ + """ outputarray = {} vginfo = _GetVGInfo(vgname) outputarray['vg_size'] = vginfo['vg_size'] @@ -171,6 +162,12 @@ def GetNodeInfo(vgname): if hyp_info is not None: outputarray.update(hyp_info) + f = open("/proc/sys/kernel/random/boot_id", 'r') + try: + outputarray["bootid"] = f.read(128).rstrip("\n") + finally: + f.close() + return outputarray @@ -192,7 +189,6 @@ def VerifyNode(what): by ssh-execution of 'hostname', result compared against name in list. """ - result = {} if 'hypervisor' in what: @@ -214,23 +210,35 @@ def GetVolumeList(vg_name): """Compute list of logical volumes and their size. Returns: - dictionary of all partions (key) with their size: - test1: 20.06MiB + dictionary of all partions (key) with their size (in MiB), inactive + and online status: + {'test1': ('20.06', True, True)} """ - result = utils.RunCmd(["lvs", "--noheadings", "--units=m", - "-oname,size", vg_name]) + lvs = {} + sep = '|' + result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", + "--separator=%s" % sep, + "-olv_name,lv_size,lv_attr", vg_name]) if result.failed: logger.Error("Failed to list logical volumes, lvs output: %s" % result.output) - return {} + return result.output + + for line in result.stdout.splitlines(): + line = line.strip().rstrip(sep) + name, size, attr = line.split(sep) + if len(attr) != 6: + attr = '------' + inactive = attr[4] == '-' + online = attr[5] == 'o' + lvs[name] = (size, inactive, online) - lvlist = [line.split() for line in result.output.splitlines()] - return dict(lvlist) + return lvs def ListVolumeGroups(): - """List the volume groups and their size + """List the volume groups and their size. Returns: Dictionary with keys volume name and values the size of the volume @@ -265,11 +273,11 @@ def NodeVolumes(): 'vg': line[3].strip(), } - return [map_line(line.split('|')) for line in result.output.splitlines()] + return [map_line(line.split('|')) for line in result.stdout.splitlines()] def BridgesExist(bridges_list): - """Check if a list of bridges exist on the current node + """Check if a list of bridges exist on the current node. Returns: True if all of them exist, false otherwise @@ -283,14 +291,14 @@ def BridgesExist(bridges_list): def GetInstanceList(): - """ provides a list of instances + """Provides a list of instances. Returns: A list of all running instances on the current node - instance1.example.com - instance2.example.com - """ + """ try: names = hypervisor.GetHypervisor().ListInstances() except errors.HypervisorError, err: @@ -301,8 +309,7 @@ def GetInstanceList(): def GetInstanceInfo(instance): - """ gives back the informations about an instance - as a dictonary + """Gives back the informations about an instance as a dictionary. Args: instance: name of the instance (ex. instance1.example.com) @@ -313,8 +320,8 @@ def GetInstanceInfo(instance): memory: memory size of instance (int) state: xen state of instance (string) time: cpu time of instance (float) - """ + """ output = {} iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance) @@ -341,13 +348,13 @@ def GetAllInstancesInfo(): state: xen state of instance (string) time: cpu time of instance (float) vcpus: the number of cpus - """ + """ output = {} iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo() if iinfo: - for name, id, memory, vcpus, state, times in iinfo: + for name, inst_id, memory, vcpus, state, times in iinfo: output[name] = { 'memory': memory, 'vcpus': vcpus, @@ -359,7 +366,7 @@ def GetAllInstancesInfo(): def AddOSToInstance(instance, os_disk, swap_disk): - """Add an os to an instance. + """Add an OS to an instance. Args: instance: the instance object @@ -371,17 +378,13 @@ def AddOSToInstance(instance, os_disk, swap_disk): create_script = inst_os.create_script - for os_device in instance.disks: - if os_device.iv_name == os_disk: - break - else: + os_device = instance.FindDisk(os_disk) + if os_device is None: logger.Error("Can't find this device-visible name '%s'" % os_disk) return False - for swap_device in instance.disks: - if swap_device.iv_name == swap_disk: - break - else: + swap_device = instance.FindDisk(swap_disk) + if swap_device is None: logger.Error("Can't find this device-visible name '%s'" % swap_disk) return False @@ -402,12 +405,69 @@ def AddOSToInstance(instance, os_disk, swap_disk): if not os.path.exists(constants.LOG_OS_DIR): os.mkdir(constants.LOG_OS_DIR, 0750) - command = utils.BuildShellCmd("cd %s; %s -i %s -b %s -s %s &>%s", + command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s", inst_os.path, create_script, instance.name, real_os_dev.dev_path, real_swap_dev.dev_path, logfile) result = utils.RunCmd(command) + if result.failed: + logger.Error("os create command '%s' returned error: %s, logfile: %s," + " output: %s" % + (command, result.fail_reason, logfile, result.output)) + return False + + return True + + +def RunRenameInstance(instance, old_name, os_disk, swap_disk): + """Run the OS rename script for an instance. + + Args: + instance: the instance object + old_name: the old name of the instance + os_disk: the instance-visible name of the os device + swap_disk: the instance-visible name of the swap device + + """ + inst_os = OSFromDisk(instance.os) + + script = inst_os.rename_script + + os_device = instance.FindDisk(os_disk) + if os_device is None: + logger.Error("Can't find this device-visible name '%s'" % os_disk) + return False + + swap_device = instance.FindDisk(swap_disk) + if swap_device is None: + logger.Error("Can't find this device-visible name '%s'" % swap_disk) + return False + + real_os_dev = _RecursiveFindBD(os_device) + if real_os_dev is None: + raise errors.BlockDeviceError("Block device '%s' is not set up" % + str(os_device)) + real_os_dev.Open() + + real_swap_dev = _RecursiveFindBD(swap_device) + if real_swap_dev is None: + raise errors.BlockDeviceError("Block device '%s' is not set up" % + str(swap_device)) + real_swap_dev.Open() + + logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os, + old_name, + instance.name, int(time.time())) + if not os.path.exists(constants.LOG_OS_DIR): + os.mkdir(constants.LOG_OS_DIR, 0750) + + command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s", + inst_os.path, script, old_name, instance.name, + real_os_dev.dev_path, real_swap_dev.dev_path, + logfile) + + result = utils.RunCmd(command) if result.failed: logger.Error("os create command '%s' returned error: %s" @@ -431,20 +491,32 @@ def _GetVGInfo(vg_name): vg_free is the free size of the volume group in MiB pv_count are the number of physical disks in that vg + If an error occurs during gathering of data, we return the same dict + with keys all set to None. + """ + retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"]) + retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings", "--nosuffix", "--units=m", "--separator=:", vg_name]) if retval.failed: errmsg = "volume group %s not present" % vg_name logger.Error(errmsg) - raise errors.LVMError(errmsg) - valarr = retval.stdout.strip().split(':') - retdic = { - "vg_size": int(round(float(valarr[0]), 0)), - "vg_free": int(round(float(valarr[1]), 0)), - "pv_count": int(valarr[2]), - } + return retdic + valarr = retval.stdout.strip().rstrip(':').split(':') + if len(valarr) == 3: + try: + retdic = { + "vg_size": int(round(float(valarr[0]), 0)), + "vg_free": int(round(float(valarr[1]), 0)), + "pv_count": int(valarr[2]), + } + except ValueError, err: + logger.Error("Fail to parse vgs output: %s" % str(err)) + else: + logger.Error("vgs output has the wrong number of fields (expected" + " three): %s" % str(valarr)) return retdic @@ -471,8 +543,8 @@ def StartInstance(instance, extra_args): Args: instance - name of instance to start. - """ + """ running_instances = GetInstanceList() if instance.name in running_instances: @@ -495,8 +567,8 @@ def ShutdownInstance(instance): Args: instance - name of instance to shutdown. - """ + """ running_instances = GetInstanceList() if instance.name not in running_instances: @@ -535,7 +607,42 @@ def ShutdownInstance(instance): return True -def CreateBlockDevice(disk, size, on_primary): +def RebootInstance(instance, reboot_type, extra_args): + """Reboot an instance. + + Args: + instance - name of instance to reboot + reboot_type - how to reboot [soft,hard,full] + + """ + running_instances = GetInstanceList() + + if instance.name not in running_instances: + logger.Error("Cannot reboot instance that is not running") + return False + + hyper = hypervisor.GetHypervisor() + if reboot_type == constants.INSTANCE_REBOOT_SOFT: + try: + hyper.RebootInstance(instance) + except errors.HypervisorError, err: + logger.Error("Failed to soft reboot instance: %s" % err) + return False + elif reboot_type == constants.INSTANCE_REBOOT_HARD: + try: + ShutdownInstance(instance) + StartInstance(instance, extra_args) + except errors.HypervisorError, err: + logger.Error("Failed to hard reboot instance: %s" % err) + return False + else: + raise errors.ParameterError("reboot_type invalid") + + + return True + + +def CreateBlockDevice(disk, size, owner, on_primary, info): """Creates a block device for an instance. Args: @@ -553,7 +660,7 @@ def CreateBlockDevice(disk, size, on_primary): clist = [] if disk.children: for child in disk.children: - crdev = _RecursiveAssembleBD(child, on_primary) + crdev = _RecursiveAssembleBD(child, owner, on_primary) if on_primary or disk.AssembleOnSecondary(): # we need the children open in case the device itself has to # be assembled @@ -575,10 +682,19 @@ def CreateBlockDevice(disk, size, on_primary): raise ValueError("Can't create child device for %s, %s" % (disk, size)) if on_primary or disk.AssembleOnSecondary(): - device.Assemble() - device.SetSyncSpeed(30*1024) + if not device.Assemble(): + errorstring = "Can't assemble device after creation" + logger.Error(errorstring) + raise errors.BlockDeviceError("%s, very unusual event - check the node" + " daemon logs" % errorstring) + device.SetSyncSpeed(constants.SYNC_SPEED) if on_primary or disk.OpenOnSecondary(): device.Open(force=True) + DevCacheManager.UpdateCache(device.dev_path, owner, + on_primary, disk.iv_name) + + device.SetInfo(info) + physical_id = device.unique_id return physical_id @@ -598,7 +714,10 @@ def RemoveBlockDevice(disk): logger.Info("Can't attach to device %s in remove" % disk) rdev = None if rdev is not None: + r_path = rdev.dev_path result = rdev.Remove() + if result: + DevCacheManager.RemoveCache(r_path) else: result = True if disk.children: @@ -607,7 +726,7 @@ def RemoveBlockDevice(disk): return result -def _RecursiveAssembleBD(disk, as_primary): +def _RecursiveAssembleBD(disk, owner, as_primary): """Activate a block device for an instance. This is run on the primary and secondary nodes for an instance. @@ -626,23 +745,38 @@ def _RecursiveAssembleBD(disk, as_primary): """ children = [] if disk.children: + mcn = disk.ChildrenNeeded() + if mcn == -1: + mcn = 0 # max number of Nones allowed + else: + mcn = len(disk.children) - mcn # max number of Nones for chld_disk in disk.children: - children.append(_RecursiveAssembleBD(chld_disk, as_primary)) + try: + cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary) + except errors.BlockDeviceError, err: + if children.count(None) >= mcn: + raise + cdev = None + logger.Debug("Error in child activation: %s" % str(err)) + children.append(cdev) if as_primary or disk.AssembleOnSecondary(): r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children) - r_dev.SetSyncSpeed(30*1024) + r_dev.SetSyncSpeed(constants.SYNC_SPEED) result = r_dev if as_primary or disk.OpenOnSecondary(): r_dev.Open() else: r_dev.Close() + DevCacheManager.UpdateCache(r_dev.dev_path, owner, + as_primary, disk.iv_name) + else: result = True return result -def AssembleBlockDevice(disk, as_primary): +def AssembleBlockDevice(disk, owner, as_primary): """Activate a block device for an instance. This is a wrapper over _RecursiveAssembleBD. @@ -652,7 +786,7 @@ def AssembleBlockDevice(disk, as_primary): True for secondary nodes """ - result = _RecursiveAssembleBD(disk, as_primary) + result = _RecursiveAssembleBD(disk, owner, as_primary) if isinstance(result, bdev.BlockDev): result = result.dev_path return result @@ -671,7 +805,10 @@ def ShutdownBlockDevice(disk): """ r_dev = _RecursiveFindBD(disk) if r_dev is not None: + r_path = r_dev.dev_path result = r_dev.Shutdown() + if result: + DevCacheManager.RemoveCache(r_path) else: result = True if disk.children: @@ -680,35 +817,45 @@ def ShutdownBlockDevice(disk): return result -def MirrorAddChild(md_cdev, new_cdev): - """Extend an MD raid1 array. +def MirrorAddChildren(parent_cdev, new_cdevs): + """Extend a mirrored block device. """ - md_bdev = _RecursiveFindBD(md_cdev, allow_partial=True) - if md_bdev is None: - logger.Error("Can't find md device") + parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True) + if parent_bdev is None: + logger.Error("Can't find parent device") return False - new_bdev = _RecursiveFindBD(new_cdev) - if new_bdev is None: - logger.Error("Can't find new device to add") + new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs] + if new_bdevs.count(None) > 0: + logger.Error("Can't find new device(s) to add: %s:%s" % + (new_bdevs, new_cdevs)) return False - new_bdev.Open() - md_bdev.AddChild(new_bdev) + parent_bdev.AddChildren(new_bdevs) return True -def MirrorRemoveChild(md_cdev, new_cdev): - """Reduce an MD raid1 array. +def MirrorRemoveChildren(parent_cdev, new_cdevs): + """Shrink a mirrored block device. """ - md_bdev = _RecursiveFindBD(md_cdev) - if md_bdev is None: - return False - new_bdev = _RecursiveFindBD(new_cdev) - if new_bdev is None: + parent_bdev = _RecursiveFindBD(parent_cdev) + if parent_bdev is None: + logger.Error("Can't find parent in remove children: %s" % parent_cdev) return False - new_bdev.Open() - md_bdev.RemoveChild(new_bdev.dev_path) + devs = [] + for disk in new_cdevs: + rpath = disk.StaticDevPath() + if rpath is None: + bd = _RecursiveFindBD(disk) + if bd is None: + logger.Error("Can't find dynamic device %s while removing children" % + disk) + return False + else: + devs.append(bd.dev_path) + else: + devs.append(rpath) + parent_bdev.RemoveChildren(devs) return True @@ -727,7 +874,7 @@ def GetMirrorStatus(disks): for dsk in disks: rbd = _RecursiveFindBD(dsk) if rbd is None: - raise errors.BlockDeviceError, "Can't find device %s" % str(dsk) + raise errors.BlockDeviceError("Can't find device %s" % str(dsk)) stats.append(rbd.CombinedSyncStatus()) return stats @@ -771,8 +918,7 @@ def FindBlockDevice(disk): rbd = _RecursiveFindBD(disk) if rbd is None: return rbd - sync_p, est_t, is_degr = rbd.GetSyncStatus() - return rbd.dev_path, rbd.major, rbd.minor, sync_p, est_t, is_degr + return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus() def UploadFile(file_name, data, mode, uid, gid, atime, mtime): @@ -787,8 +933,11 @@ def UploadFile(file_name, data, mode, uid, gid, atime, mtime): file_name) return False - allowed_files = [constants.CLUSTER_CONF_FILE, "/etc/hosts", - "/etc/ssh/ssh_known_hosts"] + allowed_files = [ + constants.CLUSTER_CONF_FILE, + constants.ETC_HOSTS, + constants.SSH_KNOWN_HOSTS_FILE, + ] allowed_files.extend(ssconf.SimpleStore().GetFileList()) if file_name not in allowed_files: logger.Error("Filename passed to UploadFile not in allowed" @@ -811,6 +960,7 @@ def UploadFile(file_name, data, mode, uid, gid, atime, mtime): utils.RemoveFile(new_name) return True + def _ErrnoOrStr(err): """Format an EnvironmentError exception. @@ -826,31 +976,49 @@ def _ErrnoOrStr(err): return detail -def _OSOndiskVersion(name, os_dir=None): - """Compute and return the api version of a given OS. +def _OSSearch(name, search_path=None): + """Search for OSes with the given name in the search_path. - This function will try to read the api version of the os given by - the 'name' parameter. By default, it wil use the constants.OS_DIR - as top-level directory for OSes, but this can be overriden by the - use of the os_dir parameter. Return value will be either an - integer denoting the version or None in the case when this is not - a valid OS name. + Args: + name: The name of the OS to look for + search_path: List of dirs to search (defaults to constants.OS_SEARCH_PATH) + + Returns: + The base_dir the OS resides in """ - if os_dir is None: - os_dir = os.path.sep.join([constants.OS_DIR, name]) + if search_path is None: + search_path = constants.OS_SEARCH_PATH + + for dir_name in search_path: + t_os_dir = os.path.sep.join([dir_name, name]) + if os.path.isdir(t_os_dir): + return dir_name + + return None + +def _OSOndiskVersion(name, os_dir): + """Compute and return the API version of a given OS. + + This function will try to read the API version of the os given by + the 'name' parameter and residing in the 'os_dir' directory. + + Return value will be either an integer denoting the version or None in the + case when this is not a valid OS name. + + """ api_file = os.path.sep.join([os_dir, "ganeti_api_version"]) try: st = os.stat(api_file) except EnvironmentError, err: - raise errors.InvalidOS, (name, "'ganeti_api_version' file not" - " found (%s)" % _ErrnoOrStr(err)) + raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not" + " found (%s)" % _ErrnoOrStr(err)) if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): - raise errors.InvalidOS, (name, "'ganeti_api_version' file is not" - " a regular file") + raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not" + " a regular file") try: f = open(api_file) @@ -859,51 +1027,52 @@ def _OSOndiskVersion(name, os_dir=None): finally: f.close() except EnvironmentError, err: - raise errors.InvalidOS, (name, "error while reading the" - " API version (%s)" % _ErrnoOrStr(err)) + raise errors.InvalidOS(name, os_dir, "error while reading the" + " API version (%s)" % _ErrnoOrStr(err)) api_version = api_version.strip() try: api_version = int(api_version) except (TypeError, ValueError), err: - raise errors.InvalidOS, (name, "API version is not integer (%s)" % - str(err)) + raise errors.InvalidOS(name, os_dir, + "API version is not integer (%s)" % str(err)) return api_version -def DiagnoseOS(top_dir=None): + +def DiagnoseOS(top_dirs=None): """Compute the validity for all OSes. - For each name in the give top_dir parameter (if not given, defaults - to constants.OS_DIR), it will return an object. If this is a valid - os, the object will be an instance of the object.OS class. If not, - it will be an instance of errors.InvalidOS and this signifies that - this name does not correspond to a valid OS. + Returns an OS object for each name in all the given top directories + (if not given defaults to constants.OS_SEARCH_PATH) Returns: - list of objects + list of OS objects """ - if top_dir is None: - top_dir = constants.OS_DIR + if top_dirs is None: + top_dirs = constants.OS_SEARCH_PATH - try: - f_names = os.listdir(top_dir) - except EnvironmentError, err: - logger.Error("Can't list the OS directory: %s" % str(err)) - return False result = [] - for name in f_names: - try: - os_inst = OSFromDisk(name, os.path.sep.join([top_dir, name])) - result.append(os_inst) - except errors.InvalidOS, err: - result.append(err) + for dir_name in top_dirs: + if os.path.isdir(dir_name): + try: + f_names = utils.ListVisibleFiles(dir_name) + except EnvironmentError, err: + logger.Error("Can't list the OS directory %s: %s" % + (dir_name, str(err))) + break + for name in f_names: + try: + os_inst = OSFromDisk(name, base_dir=dir_name) + result.append(os_inst) + except errors.InvalidOS, err: + result.append(objects.OS.FromInvalidOS(err)) return result -def OSFromDisk(name, os_dir=None): +def OSFromDisk(name, base_dir=None): """Create an OS instance from disk. This function will return an OS instance if the given name is a @@ -911,18 +1080,28 @@ def OSFromDisk(name, os_dir=None): `errors.InvalidOS` exception, detailing why this is not a valid OS. + Args: + os_dir: Directory containing the OS scripts. Defaults to a search + in all the OS_SEARCH_PATH directories. + """ - if os_dir is None: - os_dir = os.path.sep.join([constants.OS_DIR, name]) + if base_dir is None: + base_dir = _OSSearch(name) + + if base_dir is None: + raise errors.InvalidOS(name, None, "OS dir not found in search path") + + os_dir = os.path.sep.join([base_dir, name]) api_version = _OSOndiskVersion(name, os_dir) if api_version != constants.OS_API_VERSION: - raise errors.InvalidOS, (name, "API version mismatch (found %s want %s)" - % (api_version, constants.OS_API_VERSION)) + raise errors.InvalidOS(name, os_dir, "API version mismatch" + " (found %s want %s)" + % (api_version, constants.OS_API_VERSION)) # OS Scripts dictionary, we will populate it with the actual script names - os_scripts = {'create': '', 'export': '', 'import': ''} + os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''} for script in os_scripts: os_scripts[script] = os.path.sep.join([os_dir, script]) @@ -930,20 +1109,23 @@ def OSFromDisk(name, os_dir=None): try: st = os.stat(os_scripts[script]) except EnvironmentError, err: - raise errors.InvalidOS, (name, "'%s' script missing (%s)" % - (script, _ErrnoOrStr(err))) + raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" % + (script, _ErrnoOrStr(err))) if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR: - raise errors.InvalidOS, (name, "'%s' script not executable" % script) + raise errors.InvalidOS(name, os_dir, "'%s' script not executable" % + script) if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): - raise errors.InvalidOS, (name, "'%s' is not a regular file" % script) + raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" % + script) - return objects.OS(name=name, path=os_dir, + return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS, create_script=os_scripts['create'], export_script=os_scripts['export'], import_script=os_scripts['import'], + rename_script=os_scripts['rename'], api_version=api_version) @@ -958,8 +1140,8 @@ def SnapshotBlockDevice(disk): Returns: a config entry for the actual lvm device snapshotted. - """ + """ if disk.children: if len(disk.children) == 1: # only one child, let's recurse on it @@ -970,7 +1152,7 @@ def SnapshotBlockDevice(disk): if child.size == disk.size: # return implies breaking the loop return SnapshotBlockDevice(child) - elif disk.dev_type == "lvm": + elif disk.dev_type == constants.LD_LV: r_dev = _RecursiveFindBD(disk) if r_dev is not None: # let's stay on the safe side and ask for the full size, for now @@ -978,9 +1160,9 @@ def SnapshotBlockDevice(disk): else: return None else: - raise errors.ProgrammerError, ("Cannot snapshot non-lvm block device" - "'%s' of type '%s'" % - (disk.unique_id, disk.dev_type)) + raise errors.ProgrammerError("Cannot snapshot non-lvm block device" + " '%s' of type '%s'" % + (disk.unique_id, disk.dev_type)) def ExportSnapshot(disk, dest_node, instance): @@ -993,8 +1175,8 @@ def ExportSnapshot(disk, dest_node, instance): Returns: True if successful, False otherwise. - """ + """ inst_os = OSFromDisk(instance.os) export_script = inst_os.export_script @@ -1022,13 +1204,14 @@ def ExportSnapshot(disk, dest_node, instance): comprcmd = "gzip" - remotecmd = utils.BuildShellCmd("ssh -q -oStrictHostKeyChecking=yes" - " -oBatchMode=yes -oEscapeChar=none" - " %s 'mkdir -p %s; cat > %s/%s'", - dest_node, destdir, destdir, destfile) + destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s", + destdir, destdir, destfile) + remotecmd = ssh.BuildSSHCmd(dest_node, constants.GANETI_RUNAS, destcmd) + + # all commands have been checked, so we're safe to combine them - command = '|'.join([expcmd, comprcmd, remotecmd]) + command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)]) result = utils.RunCmd(command) @@ -1050,8 +1233,8 @@ def FinalizeExport(instance, snap_disks): Returns: False in case of error, True otherwise. - """ + """ destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new") finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name) @@ -1108,7 +1291,6 @@ def ExportInfo(dest): A serializable config file containing the export info. """ - cff = os.path.join(dest, constants.EXPORT_CONF_FILE) config = objects.SerializableConfigParser() @@ -1135,34 +1317,29 @@ def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image): False in case of error, True otherwise. """ - inst_os = OSFromDisk(instance.os) import_script = inst_os.import_script - for os_device in instance.disks: - if os_device.iv_name == os_disk: - break - else: + os_device = instance.FindDisk(os_disk) + if os_device is None: logger.Error("Can't find this device-visible name '%s'" % os_disk) return False - for swap_device in instance.disks: - if swap_device.iv_name == swap_disk: - break - else: + swap_device = instance.FindDisk(swap_disk) + if swap_device is None: logger.Error("Can't find this device-visible name '%s'" % swap_disk) return False real_os_dev = _RecursiveFindBD(os_device) if real_os_dev is None: - raise errors.BlockDeviceError, ("Block device '%s' is not set up" % - str(os_device)) + raise errors.BlockDeviceError("Block device '%s' is not set up" % + str(os_device)) real_os_dev.Open() real_swap_dev = _RecursiveFindBD(swap_device) if real_swap_dev is None: - raise errors.BlockDeviceError, ("Block device '%s' is not set up" % - str(swap_device)) + raise errors.BlockDeviceError("Block device '%s' is not set up" % + str(swap_device)) real_swap_dev.Open() logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os, @@ -1170,9 +1347,8 @@ def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image): if not os.path.exists(constants.LOG_OS_DIR): os.mkdir(constants.LOG_OS_DIR, 0750) - remotecmd = utils.BuildShellCmd("ssh -q -oStrictHostKeyChecking=yes" - " -oBatchMode=yes -oEscapeChar=none" - " %s 'cat %s'", src_node, src_image) + destcmd = utils.BuildShellCmd('cat %s', src_image) + remotecmd = ssh.BuildSSHCmd(src_node, constants.GANETI_RUNAS, destcmd) comprcmd = "gunzip" impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)", @@ -1180,7 +1356,7 @@ def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image): real_os_dev.dev_path, real_swap_dev.dev_path, logfile) - command = '|'.join([remotecmd, comprcmd, impcmd]) + command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd]) result = utils.RunCmd(command) @@ -1195,9 +1371,10 @@ def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image): def ListExports(): """Return a list of exports currently available on this machine. + """ if os.path.isdir(constants.EXPORT_DIR): - return os.listdir(constants.EXPORT_DIR) + return utils.ListVisibleFiles(constants.EXPORT_DIR) else: return [] @@ -1210,8 +1387,8 @@ def RemoveExport(export): Returns: False in case of error, True otherwise. - """ + """ target = os.path.join(constants.EXPORT_DIR, export) shutil.rmtree(target) @@ -1221,6 +1398,38 @@ def RemoveExport(export): return True +def RenameBlockDevices(devlist): + """Rename a list of block devices. + + The devlist argument is a list of tuples (disk, new_logical, + new_physical). The return value will be a combined boolean result + (True only if all renames succeeded). + + """ + result = True + for disk, unique_id in devlist: + dev = _RecursiveFindBD(disk) + if dev is None: + result = False + continue + try: + old_rpath = dev.dev_path + dev.Rename(unique_id) + new_rpath = dev.dev_path + if old_rpath != new_rpath: + DevCacheManager.RemoveCache(old_rpath) + # FIXME: we should add the new cache information here, like: + # DevCacheManager.UpdateCache(new_rpath, owner, ...) + # but we don't have the owner here - maybe parse from existing + # cache? for now, we only lose lvm data when we rename, which + # is less critical than DRBD or MD + except errors.BlockDeviceError, err: + logger.Error("Can't rename device '%s' to '%s': %s" % + (dev, unique_id, err)) + result = False + return result + + class HooksRunner(object): """Hook runner. @@ -1261,7 +1470,7 @@ class HooksRunner(object): fdstdin = open("/dev/null", "r") child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True, - shell=False, cwd="/",env=env) + shell=False, cwd="/", env=env) output = "" try: output = child.stdout.read(4096) @@ -1301,13 +1510,13 @@ class HooksRunner(object): elif phase == constants.HOOKS_PHASE_POST: suffix = "post" else: - raise errors.ProgrammerError, ("Unknown hooks phase: '%s'" % phase) + raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase) rr = [] subdir = "%s-%s.d" % (hpath, suffix) dir_name = "%s/%s" % (self._BASE_DIR, subdir) try: - dir_contents = os.listdir(dir_name) + dir_contents = utils.ListVisibleFiles(dir_name) except OSError, err: # must log return rr @@ -1330,3 +1539,62 @@ class HooksRunner(object): rr.append(("%s/%s" % (subdir, relname), rrval, output)) return rr + + +class DevCacheManager(object): + """Simple class for managing a chache of block device information. + + """ + _DEV_PREFIX = "/dev/" + _ROOT_DIR = constants.BDEV_CACHE_DIR + + @classmethod + def _ConvertPath(cls, dev_path): + """Converts a /dev/name path to the cache file name. + + This replaces slashes with underscores and strips the /dev + prefix. It then returns the full path to the cache file + + """ + if dev_path.startswith(cls._DEV_PREFIX): + dev_path = dev_path[len(cls._DEV_PREFIX):] + dev_path = dev_path.replace("/", "_") + fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path) + return fpath + + @classmethod + def UpdateCache(cls, dev_path, owner, on_primary, iv_name): + """Updates the cache information for a given device. + + """ + if dev_path is None: + logger.Error("DevCacheManager.UpdateCache got a None dev_path") + return + fpath = cls._ConvertPath(dev_path) + if on_primary: + state = "primary" + else: + state = "secondary" + if iv_name is None: + iv_name = "not_visible" + fdata = "%s %s %s\n" % (str(owner), state, iv_name) + try: + utils.WriteFile(fpath, data=fdata) + except EnvironmentError, err: + logger.Error("Can't update bdev cache for %s, error %s" % + (dev_path, str(err))) + + @classmethod + def RemoveCache(cls, dev_path): + """Remove data for a dev_path. + + """ + if dev_path is None: + logger.Error("DevCacheManager.RemoveCache got a None dev_path") + return + fpath = cls._ConvertPath(dev_path) + try: + utils.RemoveFile(fpath) + except EnvironmentError, err: + logger.Error("Can't update bdev cache for %s, error %s" % + (dev_path, str(err)))