X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/5d0fe286222efaa68c41e5226803f8281cb33d81..9388dc6da96f98744b50e263d72667f2c38db060:/lib/backend.py diff --git a/lib/backend.py b/lib/backend.py index 2719ab3..406a7f9 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -122,7 +122,6 @@ def LeaveCluster(): if os.path.isfile(full_name) and not os.path.islink(full_name): utils.RemoveFile(full_name) - try: priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS) except errors.OpExecError, err: @@ -211,19 +210,31 @@ def GetVolumeList(vg_name): """Compute list of logical volumes and their size. Returns: - dictionary of all partions (key) with their size: - test1: 20.06MiB + dictionary of all partions (key) with their size (in MiB), inactive + and online status: + {'test1': ('20.06', True, True)} """ - result = utils.RunCmd(["lvs", "--noheadings", "--units=m", - "-oname,size", vg_name]) + lvs = {} + sep = '|' + result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", + "--separator=%s" % sep, + "-olv_name,lv_size,lv_attr", vg_name]) if result.failed: logger.Error("Failed to list logical volumes, lvs output: %s" % result.output) - return {} + return result.output - lvlist = [line.split() for line in result.output.splitlines()] - return dict(lvlist) + for line in result.stdout.splitlines(): + line = line.strip().rstrip(sep) + name, size, attr = line.split(sep) + if len(attr) != 6: + attr = '------' + inactive = attr[4] == '-' + online = attr[5] == 'o' + lvs[name] = (size, inactive, online) + + return lvs def ListVolumeGroups(): @@ -262,7 +273,7 @@ def NodeVolumes(): 'vg': line[3].strip(), } - return [map_line(line.split('|')) for line in result.output.splitlines()] + return [map_line(line.split('|')) for line in result.stdout.splitlines()] def BridgesExist(bridges_list): @@ -400,11 +411,10 @@ def AddOSToInstance(instance, os_disk, swap_disk): logfile) result = utils.RunCmd(command) - if result.failed: - logger.Error("os create command '%s' returned error: %s" + logger.Error("os create command '%s' returned error: %s, logfile: %s," " output: %s" % - (command, result.fail_reason, result.output)) + (command, result.fail_reason, logfile, result.output)) return False return True @@ -481,20 +491,32 @@ def _GetVGInfo(vg_name): vg_free is the free size of the volume group in MiB pv_count are the number of physical disks in that vg + If an error occurs during gathering of data, we return the same dict + with keys all set to None. + """ + retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"]) + retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings", "--nosuffix", "--units=m", "--separator=:", vg_name]) if retval.failed: errmsg = "volume group %s not present" % vg_name logger.Error(errmsg) - raise errors.LVMError(errmsg) - valarr = retval.stdout.strip().split(':') - retdic = { - "vg_size": int(round(float(valarr[0]), 0)), - "vg_free": int(round(float(valarr[1]), 0)), - "pv_count": int(valarr[2]), - } + return retdic + valarr = retval.stdout.strip().rstrip(':').split(':') + if len(valarr) == 3: + try: + retdic = { + "vg_size": int(round(float(valarr[0]), 0)), + "vg_free": int(round(float(valarr[1]), 0)), + "pv_count": int(valarr[2]), + } + except ValueError, err: + logger.Error("Fail to parse vgs output: %s" % str(err)) + else: + logger.Error("vgs output has the wrong number of fields (expected" + " three): %s" % str(valarr)) return retdic @@ -620,7 +642,7 @@ def RebootInstance(instance, reboot_type, extra_args): return True -def CreateBlockDevice(disk, size, on_primary, info): +def CreateBlockDevice(disk, size, owner, on_primary, info): """Creates a block device for an instance. Args: @@ -638,13 +660,11 @@ def CreateBlockDevice(disk, size, on_primary, info): clist = [] if disk.children: for child in disk.children: - crdev = _RecursiveAssembleBD(child, on_primary) + crdev = _RecursiveAssembleBD(child, owner, on_primary) if on_primary or disk.AssembleOnSecondary(): # we need the children open in case the device itself has to # be assembled crdev.Open() - else: - crdev.Close() clist.append(crdev) try: device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist) @@ -660,10 +680,16 @@ def CreateBlockDevice(disk, size, on_primary, info): raise ValueError("Can't create child device for %s, %s" % (disk, size)) if on_primary or disk.AssembleOnSecondary(): - device.Assemble() + if not device.Assemble(): + errorstring = "Can't assemble device after creation" + logger.Error(errorstring) + raise errors.BlockDeviceError("%s, very unusual event - check the node" + " daemon logs" % errorstring) device.SetSyncSpeed(constants.SYNC_SPEED) if on_primary or disk.OpenOnSecondary(): device.Open(force=True) + DevCacheManager.UpdateCache(device.dev_path, owner, + on_primary, disk.iv_name) device.SetInfo(info) @@ -686,7 +712,10 @@ def RemoveBlockDevice(disk): logger.Info("Can't attach to device %s in remove" % disk) rdev = None if rdev is not None: + r_path = rdev.dev_path result = rdev.Remove() + if result: + DevCacheManager.RemoveCache(r_path) else: result = True if disk.children: @@ -695,7 +724,7 @@ def RemoveBlockDevice(disk): return result -def _RecursiveAssembleBD(disk, as_primary): +def _RecursiveAssembleBD(disk, owner, as_primary): """Activate a block device for an instance. This is run on the primary and secondary nodes for an instance. @@ -714,8 +743,20 @@ def _RecursiveAssembleBD(disk, as_primary): """ children = [] if disk.children: + mcn = disk.ChildrenNeeded() + if mcn == -1: + mcn = 0 # max number of Nones allowed + else: + mcn = len(disk.children) - mcn # max number of Nones for chld_disk in disk.children: - children.append(_RecursiveAssembleBD(chld_disk, as_primary)) + try: + cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary) + except errors.BlockDeviceError, err: + if children.count(None) >= mcn: + raise + cdev = None + logger.Debug("Error in child activation: %s" % str(err)) + children.append(cdev) if as_primary or disk.AssembleOnSecondary(): r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children) @@ -723,14 +764,15 @@ def _RecursiveAssembleBD(disk, as_primary): result = r_dev if as_primary or disk.OpenOnSecondary(): r_dev.Open() - else: - r_dev.Close() + DevCacheManager.UpdateCache(r_dev.dev_path, owner, + as_primary, disk.iv_name) + else: result = True return result -def AssembleBlockDevice(disk, as_primary): +def AssembleBlockDevice(disk, owner, as_primary): """Activate a block device for an instance. This is a wrapper over _RecursiveAssembleBD. @@ -740,7 +782,7 @@ def AssembleBlockDevice(disk, as_primary): True for secondary nodes """ - result = _RecursiveAssembleBD(disk, as_primary) + result = _RecursiveAssembleBD(disk, owner, as_primary) if isinstance(result, bdev.BlockDev): result = result.dev_path return result @@ -759,7 +801,10 @@ def ShutdownBlockDevice(disk): """ r_dev = _RecursiveFindBD(disk) if r_dev is not None: + r_path = r_dev.dev_path result = r_dev.Shutdown() + if result: + DevCacheManager.RemoveCache(r_path) else: result = True if disk.children: @@ -793,12 +838,20 @@ def MirrorRemoveChildren(parent_cdev, new_cdevs): if parent_bdev is None: logger.Error("Can't find parent in remove children: %s" % parent_cdev) return False - new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs] - if new_bdevs.count(None) > 0: - logger.Error("Can't find some devices while removing children: %s %s" % - (new_cdevs, new_bdevs)) - return False - parent_bdev.RemoveChildren(new_bdevs) + devs = [] + for disk in new_cdevs: + rpath = disk.StaticDevPath() + if rpath is None: + bd = _RecursiveFindBD(disk) + if bd is None: + logger.Error("Can't find dynamic device %s while removing children" % + disk) + return False + else: + devs.append(bd.dev_path) + else: + devs.append(rpath) + parent_bdev.RemoveChildren(devs) return True @@ -861,8 +914,7 @@ def FindBlockDevice(disk): rbd = _RecursiveFindBD(disk) if rbd is None: return rbd - sync_p, est_t, is_degr = rbd.GetSyncStatus() - return rbd.dev_path, rbd.major, rbd.minor, sync_p, est_t, is_degr + return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus() def UploadFile(file_name, data, mode, uid, gid, atime, mtime): @@ -877,8 +929,11 @@ def UploadFile(file_name, data, mode, uid, gid, atime, mtime): file_name) return False - allowed_files = [constants.CLUSTER_CONF_FILE, "/etc/hosts", - constants.SSH_KNOWN_HOSTS_FILE] + allowed_files = [ + constants.CLUSTER_CONF_FILE, + constants.ETC_HOSTS, + constants.SSH_KNOWN_HOSTS_FILE, + ] allowed_files.extend(ssconf.SimpleStore().GetFileList()) if file_name not in allowed_files: logger.Error("Filename passed to UploadFile not in allowed" @@ -931,10 +986,10 @@ def _OSSearch(name, search_path=None): if search_path is None: search_path = constants.OS_SEARCH_PATH - for dir in search_path: - t_os_dir = os.path.sep.join([dir, name]) + for dir_name in search_path: + t_os_dir = os.path.sep.join([dir_name, name]) if os.path.isdir(t_os_dir): - return dir + return dir_name return None @@ -984,33 +1039,31 @@ def _OSOndiskVersion(name, os_dir): def DiagnoseOS(top_dirs=None): """Compute the validity for all OSes. - For each name in all the given top directories (if not given defaults i - to constants.OS_SEARCH_PATH it will return an object. If this is a valid - os, the object will be an instance of the object.OS class. If not, - it will be an instance of errors.InvalidOS and this signifies that - this name does not correspond to a valid OS. + Returns an OS object for each name in all the given top directories + (if not given defaults to constants.OS_SEARCH_PATH) Returns: - list of objects + list of OS objects """ if top_dirs is None: top_dirs = constants.OS_SEARCH_PATH result = [] - for dir in top_dirs: - if os.path.isdir(dir): + for dir_name in top_dirs: + if os.path.isdir(dir_name): try: - f_names = utils.ListVisibleFiles(dir) + f_names = utils.ListVisibleFiles(dir_name) except EnvironmentError, err: - logger.Error("Can't list the OS directory %s: %s" % (dir,str(err))) + logger.Error("Can't list the OS directory %s: %s" % + (dir_name, str(err))) break for name in f_names: try: - os_inst = OSFromDisk(name, base_dir=dir) + os_inst = OSFromDisk(name, base_dir=dir_name) result.append(os_inst) except errors.InvalidOS, err: - result.append(err) + result.append(objects.OS.FromInvalidOS(err)) return result @@ -1064,7 +1117,7 @@ def OSFromDisk(name, base_dir=None): script) - return objects.OS(name=name, path=os_dir, + return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS, create_script=os_scripts['create'], export_script=os_scripts['export'], import_script=os_scripts['import'], @@ -1104,7 +1157,7 @@ def SnapshotBlockDevice(disk): return None else: raise errors.ProgrammerError("Cannot snapshot non-lvm block device" - "'%s' of type '%s'" % + " '%s' of type '%s'" % (disk.unique_id, disk.dev_type)) @@ -1199,6 +1252,7 @@ def FinalizeExport(instance, snap_disks): config.set(constants.INISECT_INS, 'nic%d_mac' % nic_count, '%s' % nic.mac) config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip) + config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge) # TODO: redundant: on load can read nics until it doesn't exist config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count) @@ -1356,7 +1410,16 @@ def RenameBlockDevices(devlist): result = False continue try: + old_rpath = dev.dev_path dev.Rename(unique_id) + new_rpath = dev.dev_path + if old_rpath != new_rpath: + DevCacheManager.RemoveCache(old_rpath) + # FIXME: we should add the new cache information here, like: + # DevCacheManager.UpdateCache(new_rpath, owner, ...) + # but we don't have the owner here - maybe parse from existing + # cache? for now, we only lose lvm data when we rename, which + # is less critical than DRBD or MD except errors.BlockDeviceError, err: logger.Error("Can't rename device '%s' to '%s': %s" % (dev, unique_id, err)) @@ -1404,7 +1467,7 @@ class HooksRunner(object): fdstdin = open("/dev/null", "r") child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True, - shell=False, cwd="/",env=env) + shell=False, cwd="/", env=env) output = "" try: output = child.stdout.read(4096) @@ -1473,3 +1536,62 @@ class HooksRunner(object): rr.append(("%s/%s" % (subdir, relname), rrval, output)) return rr + + +class DevCacheManager(object): + """Simple class for managing a chache of block device information. + + """ + _DEV_PREFIX = "/dev/" + _ROOT_DIR = constants.BDEV_CACHE_DIR + + @classmethod + def _ConvertPath(cls, dev_path): + """Converts a /dev/name path to the cache file name. + + This replaces slashes with underscores and strips the /dev + prefix. It then returns the full path to the cache file + + """ + if dev_path.startswith(cls._DEV_PREFIX): + dev_path = dev_path[len(cls._DEV_PREFIX):] + dev_path = dev_path.replace("/", "_") + fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path) + return fpath + + @classmethod + def UpdateCache(cls, dev_path, owner, on_primary, iv_name): + """Updates the cache information for a given device. + + """ + if dev_path is None: + logger.Error("DevCacheManager.UpdateCache got a None dev_path") + return + fpath = cls._ConvertPath(dev_path) + if on_primary: + state = "primary" + else: + state = "secondary" + if iv_name is None: + iv_name = "not_visible" + fdata = "%s %s %s\n" % (str(owner), state, iv_name) + try: + utils.WriteFile(fpath, data=fdata) + except EnvironmentError, err: + logger.Error("Can't update bdev cache for %s, error %s" % + (dev_path, str(err))) + + @classmethod + def RemoveCache(cls, dev_path): + """Remove data for a dev_path. + + """ + if dev_path is None: + logger.Error("DevCacheManager.RemoveCache got a None dev_path") + return + fpath = cls._ConvertPath(dev_path) + try: + utils.RemoveFile(fpath) + except EnvironmentError, err: + logger.Error("Can't update bdev cache for %s, error %s" % + (dev_path, str(err)))