X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/489fcbe9d69b4d1d32a82d644d914039d7dfeff0..4fe80ef2ed1cda3a6357274eccafe5c1f21a5283:/lib/backend.py?ds=sidebyside diff --git a/lib/backend.py b/lib/backend.py index 48d0dc8..44e45f6 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -19,7 +19,12 @@ # 02110-1301, USA. -"""Functions used by the node daemon""" +"""Functions used by the node daemon + +@var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in + the L{UploadFile} function + +""" import os @@ -115,6 +120,23 @@ def _CleanDirectory(path, exclude=None): utils.RemoveFile(full_name) +def _BuildUploadFileList(): + """Build the list of allowed upload files. + + This is abstracted so that it's built only once at module import time. + + """ + return frozenset([ + constants.CLUSTER_CONF_FILE, + constants.ETC_HOSTS, + constants.SSH_KNOWN_HOSTS_FILE, + constants.VNC_PASSWORD_FILE, + ]) + + +_ALLOWED_UPLOAD_FILES = _BuildUploadFileList() + + def JobQueuePurge(): """Removes job queue files and archived jobs. @@ -141,13 +163,13 @@ def GetMasterInfo(): master_netdev = cfg.GetMasterNetdev() master_ip = cfg.GetMasterIP() master_node = cfg.GetMasterNode() - except errors.ConfigurationError, err: + except errors.ConfigurationError: logging.exception("Cluster configuration incomplete") return (None, None, None) return (master_netdev, master_ip, master_node) -def StartMaster(start_daemons): +def StartMaster(start_daemons, no_voting): """Activate local node as master node. The function will always try activate the IP address of the master @@ -157,6 +179,9 @@ def StartMaster(start_daemons): @type start_daemons: boolean @param start_daemons: whther to also start the master daemons (ganeti-masterd and ganeti-rapi) + @type no_voting: boolean + @param no_voting: whether to start ganeti-masterd without a node vote + (if start_daemons is True), but still non-interactively @rtype: None """ @@ -186,8 +211,17 @@ def StartMaster(start_daemons): # and now start the master and rapi daemons if start_daemons: - for daemon in 'ganeti-masterd', 'ganeti-rapi': - result = utils.RunCmd([daemon]) + daemons_params = { + 'ganeti-masterd': [], + 'ganeti-rapi': [], + } + if no_voting: + daemons_params['ganeti-masterd'].append('--no-voting') + daemons_params['ganeti-masterd'].append('--yes-do-it') + for daemon in daemons_params: + cmd = [daemon] + cmd.extend(daemons_params[daemon]) + result = utils.RunCmd(cmd) if result.failed: logging.error("Can't start daemon %s: %s", daemon, result.output) ok = False @@ -308,7 +342,7 @@ def LeaveCluster(): def GetNodeInfo(vgname, hypervisor_type): - """Gives back a hash with different informations about the node. + """Gives back a hash with different information about the node. @type vgname: C{string} @param vgname: the name of the volume group to ask for disk space information @@ -437,9 +471,9 @@ def VerifyNode(what, cluster_name): if constants.NV_DRBDLIST in what: try: used_minors = bdev.DRBD8.GetUsedDevs().keys() - except errors.BlockDeviceError: + except errors.BlockDeviceError, err: logging.warning("Can't get used minors list", exc_info=True) - used_minors = [] + used_minors = str(err) result[constants.NV_DRBDLIST] = used_minors return result @@ -573,16 +607,15 @@ def GetInstanceList(hypervisor_list): try: names = hypervisor.GetHypervisor(hname).ListInstances() results.extend(names) - except errors.HypervisorError, err: + except errors.HypervisorError: logging.exception("Error enumerating instances for hypevisor %s", hname) - # FIXME: should we somehow not propagate this to the master? raise return results def GetInstanceInfo(instance, hname): - """Gives back the informations about an instance as a dictionary. + """Gives back the information about an instance as a dictionary. @type instance: string @param instance: the instance name @@ -661,9 +694,14 @@ def GetAllInstancesInfo(hypervisor_list): 'state': state, 'time': times, } - if name in output and output[name] != value: - raise errors.HypervisorError("Instance %s running duplicate" - " with different parameters" % name) + if name in output: + # we only check static parameters, like memory and vcpus, + # and not state and time which can change between the + # invocations of the different hypervisors + for key in 'memory', 'vcpus': + if value[key] != output[name][key]: + raise errors.HypervisorError("Instance %s is running twice" + " with different parameters" % name) output[name] = value return output @@ -742,7 +780,7 @@ def RunRenameInstance(instance, old_name): def _GetVGInfo(vg_name): - """Get informations about the volume group. + """Get information about the volume group. @type vg_name: str @param vg_name: the volume group which we query @@ -857,7 +895,7 @@ def _GatherAndLinkBlockDevs(instance): return block_devices -def StartInstance(instance, extra_args): +def StartInstance(instance): """Start an instance. @type instance: L{objects.Instance} @@ -874,7 +912,7 @@ def StartInstance(instance, extra_args): try: block_devices = _GatherAndLinkBlockDevs(instance) hyper = hypervisor.GetHypervisor(instance.hypervisor) - hyper.StartInstance(instance, block_devices, extra_args) + hyper.StartInstance(instance, block_devices) except errors.BlockDeviceError, err: logging.exception("Failed to start instance") return (False, "Block device error: %s" % str(err)) @@ -886,7 +924,7 @@ def StartInstance(instance, extra_args): return (True, "Instance started successfully") -def ShutdownInstance(instance): +def InstanceShutdown(instance): """Shut an instance down. @note: this functions uses polling with a hardcoded timeout. @@ -901,19 +939,20 @@ def ShutdownInstance(instance): running_instances = GetInstanceList([hv_name]) if instance.name not in running_instances: - return True + return (True, "Instance already stopped") hyper = hypervisor.GetHypervisor(hv_name) try: hyper.StopInstance(instance) except errors.HypervisorError, err: - logging.error("Failed to stop instance: %s" % err) - return False + msg = "Failed to stop instance %s: %s" % (instance.name, err) + logging.error(msg) + return (False, msg) # test every 10secs for 2min time.sleep(1) - for dummy in range(11): + for _ in range(11): if instance.name not in GetInstanceList([hv_name]): break time.sleep(10) @@ -925,21 +964,23 @@ def ShutdownInstance(instance): try: hyper.StopInstance(instance, force=True) except errors.HypervisorError, err: - logging.exception("Failed to stop instance: %s" % err) - return False + msg = "Failed to force stop instance %s: %s" % (instance.name, err) + logging.error(msg) + return (False, msg) time.sleep(1) if instance.name in GetInstanceList([hv_name]): - logging.error("Could not shutdown instance '%s' even by destroy", - instance.name) - return False + msg = ("Could not shutdown instance %s even by destroy" % + instance.name) + logging.error(msg) + return (False, msg) _RemoveBlockDevLinks(instance.name, instance.disks) - return True + return (True, "Instance has been shutdown successfully") -def InstanceReboot(instance, reboot_type, extra_args): +def InstanceReboot(instance, reboot_type): """Reboot an instance. @type instance: L{objects.Instance} @@ -951,9 +992,10 @@ def InstanceReboot(instance, reboot_type, extra_args): instance OS, do not recreate the VM - L{constants.INSTANCE_REBOOT_HARD}: tear down and restart the VM (at the hypervisor level) - - the other reboot type (L{constants.INSTANCE_REBOOT_HARD}) - is not accepted here, since that mode is handled - differently + - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is + not accepted here, since that mode is handled differently, in + cmdlib, and translates into full stop and start of the + instance (instead of a call_instance_reboot RPC) @rtype: boolean @return: the success of the operation @@ -975,8 +1017,10 @@ def InstanceReboot(instance, reboot_type, extra_args): return (False, msg) elif reboot_type == constants.INSTANCE_REBOOT_HARD: try: - ShutdownInstance(instance) - StartInstance(instance, extra_args) + stop_result = InstanceShutdown(instance) + if not stop_result[0]: + return stop_result + return StartInstance(instance) except errors.HypervisorError, err: msg = "Failed to hard reboot instance %s: %s" % (instance.name, err) logging.error(msg) @@ -1022,7 +1066,7 @@ def AcceptInstance(instance, info, target): msg = "Failed to accept instance" logging.exception(msg) return (False, '%s: %s' % (msg, err)) - return (True, "Accept successfull") + return (True, "Accept successful") def FinalizeMigration(instance, info, success): @@ -1070,7 +1114,7 @@ def MigrateInstance(instance, target, live): msg = "Failed to migrate instance" logging.exception(msg) return (False, "%s: %s" % (msg, err)) - return (True, "Migration successfull") + return (True, "Migration successful") def BlockdevCreate(disk, size, owner, on_primary, info): @@ -1115,7 +1159,7 @@ def BlockdevCreate(disk, size, owner, on_primary, info): clist.append(crdev) try: - device = bdev.Create(disk.dev_type, disk.physical_id, clist, size) + device = bdev.Create(disk.dev_type, disk.physical_id, clist, disk.size) except errors.BlockDeviceError, err: return False, "Can't create block device: %s" % str(err) @@ -1225,7 +1269,7 @@ def _RecursiveAssembleBD(disk, owner, as_primary): children.append(cdev) if as_primary or disk.AssembleOnSecondary(): - r_dev = bdev.Assemble(disk.dev_type, disk.physical_id, children) + r_dev = bdev.Assemble(disk.dev_type, disk.physical_id, children, disk.size) r_dev.SetSyncSpeed(constants.SYNC_SPEED) result = r_dev if as_primary or disk.OpenOnSecondary(): @@ -1263,7 +1307,7 @@ def BlockdevAssemble(disk, owner, as_primary): def BlockdevShutdown(disk): """Shut down a block device. - First, if the device is assembled (Attach() is successfull), then + First, if the device is assembled (Attach() is successful), then the device is shutdown. Then the children of the device are shutdown. @@ -1381,7 +1425,7 @@ def BlockdevGetmirrorstatus(disks): def _RecursiveFindBD(disk): """Check if a device is activated. - If so, return informations about the real device. + If so, return information about the real device. @type disk: L{objects.Disk} @param disk: the disk object we need to find @@ -1395,13 +1439,13 @@ def _RecursiveFindBD(disk): for chdisk in disk.children: children.append(_RecursiveFindBD(chdisk)) - return bdev.FindDevice(disk.dev_type, disk.physical_id, children) + return bdev.FindDevice(disk.dev_type, disk.physical_id, children, disk.size) def BlockdevFind(disk): """Check if a device is activated. - If it is, return informations about the real device. + If it is, return information about the real device. @type disk: L{objects.Disk} @param disk: the disk to find @@ -1420,6 +1464,32 @@ def BlockdevFind(disk): return (True, (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()) +def BlockdevGetsize(disks): + """Computes the size of the given disks. + + If a disk is not found, returns None instead. + + @type disks: list of L{objects.Disk} + @param disks: the list of disk to compute the size for + @rtype: list + @return: list with elements None if the disk cannot be found, + otherwise the size + + """ + result = [] + for cf in disks: + try: + rbd = _RecursiveFindBD(cf) + except errors.BlockDeviceError, err: + result.append(None) + continue + if rbd is None: + result.append(None) + else: + result.append(rbd.GetActualSize()) + return result + + def UploadFile(file_name, data, mode, uid, gid, atime, mtime): """Write a file to the filesystem. @@ -1450,14 +1520,7 @@ def UploadFile(file_name, data, mode, uid, gid, atime, mtime): file_name) return False - allowed_files = [ - constants.CLUSTER_CONF_FILE, - constants.ETC_HOSTS, - constants.SSH_KNOWN_HOSTS_FILE, - constants.VNC_PASSWORD_FILE, - ] - - if file_name not in allowed_files: + if file_name not in _ALLOWED_UPLOAD_FILES: logging.error("Filename passed to UploadFile not in allowed" " upload targets: '%s'", file_name) return False @@ -1663,7 +1726,6 @@ def OSEnvironment(instance, debug=0): str(disk)) real_disk.Open() result['DISK_%d_PATH' % idx] = real_disk.dev_path - # FIXME: When disks will have read-only mode, populate this result['DISK_%d_ACCESS' % idx] = disk.mode if constants.HV_DISK_TYPE in instance.hvparams: result['DISK_%d_FRONTEND_TYPE' % idx] = \ @@ -1682,6 +1744,10 @@ def OSEnvironment(instance, debug=0): result['NIC_%d_FRONTEND_TYPE' % idx] = \ instance.hvparams[constants.HV_NIC_TYPE] + for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]: + for key, value in source.items(): + result["INSTANCE_%s_%s" % (kind, key)] = str(value) + return result def BlockdevGrow(disk, amount): @@ -1787,8 +1853,8 @@ def ExportSnapshot(disk, dest_node, instance, cluster_name, idx): # the target command is built out of three individual commands, # which are joined by pipes; we check each individual command for # valid parameters - expcmd = utils.BuildShellCmd("cd %s; %s 2>%s", inst_os.path, - export_script, logfile) + expcmd = utils.BuildShellCmd("set -e; set -o pipefail; cd %s; %s 2>%s", + inst_os.path, export_script, logfile) comprcmd = "gzip" @@ -1801,7 +1867,7 @@ def ExportSnapshot(disk, dest_node, instance, cluster_name, idx): # all commands have been checked, so we're safe to combine them command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)]) - result = utils.RunCmd(command, env=export_env) + result = utils.RunCmd(["bash", "-c", command], env=export_env) if result.failed: logging.error("os snapshot export command '%s' returned error: %s" @@ -2011,7 +2077,7 @@ def BlockdevRename(devlist): # but we don't have the owner here - maybe parse from existing # cache? for now, we only lose lvm data when we rename, which # is less critical than DRBD or MD - except errors.BlockDeviceError, err: + except errors.BlockDeviceError: logging.exception("Can't rename device '%s' to '%s'", dev, unique_id) result = False return result @@ -2081,7 +2147,7 @@ def RemoveFileStorageDir(file_storage_dir): @param file_storage_dir: the directory we should cleanup @rtype: tuple (success,) @return: tuple of one element, C{success}, denoting - whether the operation was successfull + whether the operation was successful """ file_storage_dir = _TransformFileStorageDir(file_storage_dir) @@ -2096,7 +2162,7 @@ def RemoveFileStorageDir(file_storage_dir): # deletes dir only if empty, otherwise we want to return False try: os.rmdir(file_storage_dir) - except OSError, err: + except OSError: logging.exception("Cannot remove file storage directory '%s'", file_storage_dir) result = False, @@ -2125,7 +2191,7 @@ def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir): if os.path.isdir(old_file_storage_dir): try: os.rename(old_file_storage_dir, new_file_storage_dir) - except OSError, err: + except OSError: logging.exception("Cannot rename '%s' to '%s'", old_file_storage_dir, new_file_storage_dir) result = False, @@ -2295,7 +2361,8 @@ def DemoteFromMC(): if utils.IsProcessAlive(utils.ReadPidFile(pid_file)): return (False, "The master daemon is running, will not demote") try: - utils.CreateBackup(constants.CLUSTER_CONF_FILE) + if os.path.isfile(constants.CLUSTER_CONF_FILE): + utils.CreateBackup(constants.CLUSTER_CONF_FILE) except EnvironmentError, err: if err.errno != errno.ENOENT: return (False, "Error while backing up cluster file: %s" % str(err)) @@ -2380,7 +2447,7 @@ def DrbdAttachNet(nodes_ip, disks, instance_name, multimaster): # standalone, even though this should not happen with the # new staged way of changing disk configs try: - rd.ReAttachNet(multimaster) + rd.AttachNet(multimaster) except errors.BlockDeviceError, err: return (False, "Can't change network configuration: %s" % str(err)) if all_connected: @@ -2432,8 +2499,6 @@ class HooksRunner(object): on the master side. """ - RE_MASK = re.compile("^[a-zA-Z0-9_-]+$") - def __init__(self, hooks_base_dir=None): """Constructor for hooks runner. @@ -2530,7 +2595,7 @@ class HooksRunner(object): dir_name = "%s/%s" % (self._BASE_DIR, subdir) try: dir_contents = utils.ListVisibleFiles(dir_name) - except OSError, err: + except OSError: # FIXME: must log output in case of failures return rr @@ -2540,7 +2605,7 @@ class HooksRunner(object): for relname in dir_contents: fname = os.path.join(dir_name, relname) if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and - self.RE_MASK.match(relname) is not None): + constants.EXT_PLUGIN_MASK.match(relname) is not None): rrval = constants.HKR_SKIP output = "" else: @@ -2653,7 +2718,7 @@ class DevCacheManager(object): fdata = "%s %s %s\n" % (str(owner), state, iv_name) try: utils.WriteFile(fpath, data=fdata) - except EnvironmentError, err: + except EnvironmentError: logging.exception("Can't update bdev cache for %s", dev_path) @classmethod @@ -2675,5 +2740,5 @@ class DevCacheManager(object): fpath = cls._ConvertPath(dev_path) try: utils.RemoveFile(fpath) - except EnvironmentError, err: + except EnvironmentError: logging.exception("Can't update bdev cache for %s", dev_path)