X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/bd45767bf3d087eb3eebb5a55233d5fdfd5fa07f..1094acda35687a23ef68aa592e5428dd882c76fa:/lib/cmdlib.py diff --git a/lib/cmdlib.py b/lib/cmdlib.py index f69365e..ea284e1 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -1381,9 +1381,11 @@ class LURenameCluster(LogicalUnit): result = self.rpc.call_upload_file(node_list, constants.SSH_KNOWN_HOSTS_FILE) for to_node, to_result in result.iteritems(): - if to_result.failed or not to_result.data: - logging.error("Copy of file %s to node %s failed", - constants.SSH_KNOWN_HOSTS_FILE, to_node) + msg = to_result.RemoteFailMsg() + if msg: + msg = ("Copy of file %s to node %s failed: %s" % + (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg)) + self.proc.LogWarning(msg) finally: result = self.rpc.call_node_start_master(master, False) @@ -1417,7 +1419,7 @@ class LUSetClusterParams(LogicalUnit): _OP_REQP = [] REQ_BGL = False - def CheckParameters(self): + def CheckArguments(self): """Check parameters """ @@ -1426,7 +1428,7 @@ class LUSetClusterParams(LogicalUnit): if self.op.candidate_pool_size is not None: try: self.op.candidate_pool_size = int(self.op.candidate_pool_size) - except ValueError, err: + except (ValueError, TypeError), err: raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" % str(err)) if self.op.candidate_pool_size < 1: @@ -1484,14 +1486,20 @@ class LUSetClusterParams(LogicalUnit): (node, vgstatus)) self.cluster = cluster = self.cfg.GetClusterInfo() - # validate beparams changes + # validate params changes if self.op.beparams: utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) - self.new_beparams = cluster.FillDict( - cluster.beparams[constants.BEGR_DEFAULT], self.op.beparams) + self.new_beparams = objects.FillDict( + cluster.beparams[constants.PP_DEFAULT], self.op.beparams) + + if self.op.nicparams: + utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES) + self.new_nicparams = objects.FillDict( + cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams) + objects.NIC.CheckParameterSyntax(self.new_nicparams) # hypervisor list/parameters - self.new_hvparams = cluster.FillDict(cluster.hvparams, {}) + self.new_hvparams = objects.FillDict(cluster.hvparams, {}) if self.op.hvparams: if not isinstance(self.op.hvparams, dict): raise errors.OpPrereqError("Invalid 'hvparams' parameter on input") @@ -1523,8 +1531,11 @@ class LUSetClusterParams(LogicalUnit): """ if self.op.vg_name is not None: - if self.op.vg_name != self.cfg.GetVGName(): - self.cfg.SetVGName(self.op.vg_name) + new_volume = self.op.vg_name + if not new_volume: + new_volume = None + if new_volume != self.cfg.GetVGName(): + self.cfg.SetVGName(new_volume) else: feedback_fn("Cluster LVM configuration already in desired" " state, not changing") @@ -1533,7 +1544,10 @@ class LUSetClusterParams(LogicalUnit): if self.op.enabled_hypervisors is not None: self.cluster.enabled_hypervisors = self.op.enabled_hypervisors if self.op.beparams: - self.cluster.beparams[constants.BEGR_DEFAULT] = self.new_beparams + self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams + if self.op.nicparams: + self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams + if self.op.candidate_pool_size is not None: self.cluster.candidate_pool_size = self.op.candidate_pool_size @@ -1545,6 +1559,48 @@ class LUSetClusterParams(LogicalUnit): _AdjustCandidatePool(self) +def _RedistributeAncillaryFiles(lu, additional_nodes=None): + """Distribute additional files which are part of the cluster configuration. + + ConfigWriter takes care of distributing the config and ssconf files, but + there are more files which should be distributed to all nodes. This function + makes sure those are copied. + + @param lu: calling logical unit + @param additional_nodes: list of nodes not in the config to distribute to + + """ + # 1. Gather target nodes + myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode()) + dist_nodes = lu.cfg.GetNodeList() + if additional_nodes is not None: + dist_nodes.extend(additional_nodes) + if myself.name in dist_nodes: + dist_nodes.remove(myself.name) + # 2. Gather files to distribute + dist_files = set([constants.ETC_HOSTS, + constants.SSH_KNOWN_HOSTS_FILE, + constants.RAPI_CERT_FILE, + constants.RAPI_USERS_FILE, + ]) + + enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors + for hv_name in enabled_hypervisors: + hv_class = hypervisor.GetHypervisor(hv_name) + dist_files.update(hv_class.GetAncillaryFiles()) + + # 3. Perform the files upload + for fname in dist_files: + if os.path.exists(fname): + result = lu.rpc.call_upload_file(dist_nodes, fname) + for to_node, to_result in result.items(): + msg = to_result.RemoteFailMsg() + if msg: + msg = ("Copy of file %s to node %s failed: %s" % + (fname, to_node, msg)) + lu.proc.LogWarning(msg) + + class LURedistributeConfig(NoHooksLU): """Force the redistribution of cluster configuration. @@ -1570,6 +1626,7 @@ class LURedistributeConfig(NoHooksLU): """ self.cfg.Update(self.cfg.GetClusterInfo()) + _RedistributeAncillaryFiles(self) def _WaitForSync(lu, instance, oneshot=False, unlock=False): @@ -2217,7 +2274,8 @@ class LUAddNode(LogicalUnit): " new node: %s" % msg) # Add node to our /etc/hosts, and add key to known_hosts - utils.AddHostToEtcHosts(new_node.name) + if self.cfg.GetClusterInfo().modify_etc_hosts: + utils.AddHostToEtcHosts(new_node.name) if new_node.secondary_ip != new_node.primary_ip: result = self.rpc.call_node_has_ip_address(new_node.name, @@ -2245,35 +2303,11 @@ class LUAddNode(LogicalUnit): (verifier, result[verifier].data['nodelist'][failed])) raise errors.OpExecError("ssh/hostname verification failed.") - # Distribute updated /etc/hosts and known_hosts to all nodes, - # including the node just added - myself = self.cfg.GetNodeInfo(self.cfg.GetMasterNode()) - dist_nodes = self.cfg.GetNodeList() - if not self.op.readd: - dist_nodes.append(node) - if myself.name in dist_nodes: - dist_nodes.remove(myself.name) - - logging.debug("Copying hosts and known_hosts to all nodes") - for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE): - result = self.rpc.call_upload_file(dist_nodes, fname) - for to_node, to_result in result.iteritems(): - if to_result.failed or not to_result.data: - logging.error("Copy of file %s to node %s failed", fname, to_node) - - to_copy = [] - enabled_hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors - if constants.HTS_COPY_VNC_PASSWORD.intersection(enabled_hypervisors): - to_copy.append(constants.VNC_PASSWORD_FILE) - - for fname in to_copy: - result = self.rpc.call_upload_file([node], fname) - if result[node].failed or not result[node]: - logging.error("Could not copy file %s to node %s", fname, node) - if self.op.readd: + _RedistributeAncillaryFiles(self) self.context.ReaddNode(new_node) else: + _RedistributeAncillaryFiles(self, additional_nodes=node) self.context.AddNode(new_node) @@ -2404,6 +2438,51 @@ class LUSetNodeParams(LogicalUnit): return result +class LUPowercycleNode(NoHooksLU): + """Powercycles a node. + + """ + _OP_REQP = ["node_name", "force"] + REQ_BGL = False + + def CheckArguments(self): + node_name = self.cfg.ExpandNodeName(self.op.node_name) + if node_name is None: + raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name) + self.op.node_name = node_name + if node_name == self.cfg.GetMasterNode() and not self.op.force: + raise errors.OpPrereqError("The node is the master and the force" + " parameter was not set") + + def ExpandNames(self): + """Locking for PowercycleNode. + + This is a last-resource option and shouldn't block on other + jobs. Therefore, we grab no locks. + + """ + self.needed_locks = {} + + def CheckPrereq(self): + """Check prerequisites. + + This LU has no prereqs. + + """ + pass + + def Exec(self, feedback_fn): + """Reboots a node. + + """ + result = self.rpc.call_node_powercycle(self.op.node_name, + self.cfg.GetHypervisorType()) + msg = result.RemoteFailMsg() + if msg: + raise errors.OpExecError("Failed to schedule the reboot: %s" % msg) + return result.payload + + class LUQueryClusterInfo(NoHooksLU): """Query cluster configuration. @@ -2439,7 +2518,12 @@ class LUQueryClusterInfo(NoHooksLU): "hvparams": dict([(hypervisor, cluster.hvparams[hypervisor]) for hypervisor in cluster.enabled_hypervisors]), "beparams": cluster.beparams, + "nicparams": cluster.nicparams, "candidate_pool_size": cluster.candidate_pool_size, + "default_bridge": cluster.default_bridge, + "master_netdev": cluster.master_netdev, + "volume_group_name": cluster.volume_group_name, + "file_storage_dir": cluster.file_storage_dir, } return result @@ -2754,15 +2838,48 @@ class LUStartupInstance(LogicalUnit): assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name + # extra beparams + self.beparams = getattr(self.op, "beparams", {}) + if self.beparams: + if not isinstance(self.beparams, dict): + raise errors.OpPrereqError("Invalid beparams passed: %s, expected" + " dict" % (type(self.beparams), )) + # fill the beparams dict + utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES) + self.op.beparams = self.beparams + + # extra hvparams + self.hvparams = getattr(self.op, "hvparams", {}) + if self.hvparams: + if not isinstance(self.hvparams, dict): + raise errors.OpPrereqError("Invalid hvparams passed: %s, expected" + " dict" % (type(self.hvparams), )) + + # check hypervisor parameter syntax (locally) + cluster = self.cfg.GetClusterInfo() + utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES) + filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor], + instance.hvparams) + filled_hvp.update(self.hvparams) + hv_type = hypervisor.GetHypervisor(instance.hypervisor) + hv_type.CheckParameterSyntax(filled_hvp) + _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp) + self.op.hvparams = self.hvparams + _CheckNodeOnline(self, instance.primary_node) bep = self.cfg.GetClusterInfo().FillBE(instance) # check bridges existance _CheckInstanceBridgesExist(self, instance) - _CheckNodeFreeMemory(self, instance.primary_node, - "starting instance %s" % instance.name, - bep[constants.BE_MEMORY], instance.hypervisor) + remote_info = self.rpc.call_instance_info(instance.primary_node, + instance.name, + instance.hypervisor) + remote_info.Raise() + if not remote_info.data: + _CheckNodeFreeMemory(self, instance.primary_node, + "starting instance %s" % instance.name, + bep[constants.BE_MEMORY], instance.hypervisor) def Exec(self, feedback_fn): """Start the instance. @@ -2777,7 +2894,8 @@ class LUStartupInstance(LogicalUnit): _StartInstanceDisks(self, instance, force) - result = self.rpc.call_instance_start(node_current, instance) + result = self.rpc.call_instance_start(node_current, instance, + self.hvparams, self.beparams) msg = result.RemoteFailMsg() if msg: _ShutdownInstanceDisks(self, instance) @@ -2859,7 +2977,7 @@ class LURebootInstance(LogicalUnit): " full reboot: %s" % msg) _ShutdownInstanceDisks(self, instance) _StartInstanceDisks(self, instance, ignore_secondaries) - result = self.rpc.call_instance_start(node_current, instance) + result = self.rpc.call_instance_start(node_current, instance, None, None) msg = result.RemoteFailMsg() if msg: _ShutdownInstanceDisks(self, instance) @@ -2959,7 +3077,8 @@ class LUReinstallInstance(LogicalUnit): remote_info = self.rpc.call_instance_info(instance.primary_node, instance.name, instance.hypervisor) - if remote_info.failed or remote_info.data: + remote_info.Raise() + if remote_info.data: raise errors.OpPrereqError("Instance '%s' is running on the node %s" % (self.op.instance_name, instance.primary_node)) @@ -2994,7 +3113,7 @@ class LUReinstallInstance(LogicalUnit): _StartInstanceDisks(self, inst, None) try: feedback_fn("Running the instance OS create scripts...") - result = self.rpc.call_instance_os_add(inst.primary_node, inst) + result = self.rpc.call_instance_os_add(inst.primary_node, inst, True) msg = result.RemoteFailMsg() if msg: raise errors.OpExecError("Could not install OS for instance %s" @@ -3549,7 +3668,7 @@ class LUFailoverInstance(LogicalUnit): raise errors.OpExecError("Can't activate the instance's disks") feedback_fn("* starting the instance on the target node") - result = self.rpc.call_instance_start(target_node, instance) + result = self.rpc.call_instance_start(target_node, instance, None, None) msg = result.RemoteFailMsg() if msg: _ShutdownInstanceDisks(self, instance) @@ -4295,14 +4414,14 @@ class LUCreateInstance(LogicalUnit): # check hypervisor parameter syntax (locally) utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES) - filled_hvp = cluster.FillDict(cluster.hvparams[self.op.hypervisor], + filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor], self.op.hvparams) hv_type = hypervisor.GetHypervisor(self.op.hypervisor) hv_type.CheckParameterSyntax(filled_hvp) # fill and remember the beparams dict utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) - self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT], + self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT], self.op.beparams) #### instance parameters check @@ -4762,7 +4881,7 @@ class LUCreateInstance(LogicalUnit): if iobj.disk_template != constants.DT_DISKLESS: if self.op.mode == constants.INSTANCE_CREATE: feedback_fn("* running the instance OS create scripts...") - result = self.rpc.call_instance_os_add(pnode_name, iobj) + result = self.rpc.call_instance_os_add(pnode_name, iobj, False) msg = result.RemoteFailMsg() if msg: raise errors.OpExecError("Could not add os for instance %s" @@ -4793,7 +4912,7 @@ class LUCreateInstance(LogicalUnit): self.cfg.Update(iobj) logging.info("Starting instance %s on node %s", instance, pnode_name) feedback_fn("* starting instance...") - result = self.rpc.call_instance_start(pnode_name, iobj) + result = self.rpc.call_instance_start(pnode_name, iobj, None, None) msg = result.RemoteFailMsg() if msg: raise errors.OpExecError("Could not start instance: %s" % msg) @@ -5115,10 +5234,11 @@ class LUReplaceDisks(LogicalUnit): for dev, old_lvs, new_lvs in iv_names.itervalues(): info("detaching %s drbd from local storage" % dev.iv_name) result = self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs) - result.Raise() - if not result.data: + msg = result.RemoteFailMsg() + if msg: raise errors.OpExecError("Can't detach drbd from local storage on node" - " %s for device %s" % (tgt_node, dev.iv_name)) + " %s for device %s: %s" % + (tgt_node, dev.iv_name, msg)) #dev.children = [] #cfg.Update(instance) @@ -5142,16 +5262,18 @@ class LUReplaceDisks(LogicalUnit): info("renaming the old LVs on the target node") result = self.rpc.call_blockdev_rename(tgt_node, rlist) - result.Raise() - if not result.data: - raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node) + msg = result.RemoteFailMsg() + if msg: + raise errors.OpExecError("Can't rename old LVs on node %s: %s" % + (tgt_node, msg)) # now we rename the new LVs to the old LVs info("renaming the new LVs on the target node") rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)] result = self.rpc.call_blockdev_rename(tgt_node, rlist) - result.Raise() - if not result.data: - raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node) + msg = result.RemoteFailMsg() + if msg: + raise errors.OpExecError("Can't rename new LVs on node %s: %s" % + (tgt_node, msg)) for old, new in zip(old_lvs, new_lvs): new.logical_id = old.logical_id @@ -5164,13 +5286,14 @@ class LUReplaceDisks(LogicalUnit): # now that the new lvs have the old name, we can add them to the device info("adding new mirror component on %s" % tgt_node) result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs) - if result.failed or not result.data: + msg = result.RemoteFailMsg() + if msg: for new_lv in new_lvs: msg = self.rpc.call_blockdev_remove(tgt_node, new_lv).RemoteFailMsg() if msg: warning("Can't rollback device %s: %s", dev, msg, hint="cleanup manually the unused logical volumes") - raise errors.OpExecError("Can't add local storage to drbd") + raise errors.OpExecError("Can't add local storage to drbd: %s" % msg) dev.children = new_lvs cfg.Update(instance) @@ -5850,7 +5973,7 @@ class LUSetInstanceParams(LogicalUnit): i_hvdict[key] = val cluster = self.cfg.GetClusterInfo() utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES) - hv_new = cluster.FillDict(cluster.hvparams[instance.hypervisor], + hv_new = objects.FillDict(cluster.hvparams[instance.hypervisor], i_hvdict) # local check hypervisor.GetHypervisor( @@ -5874,7 +5997,7 @@ class LUSetInstanceParams(LogicalUnit): i_bedict[key] = val cluster = self.cfg.GetClusterInfo() utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES) - be_new = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT], + be_new = objects.FillDict(cluster.beparams[constants.PP_DEFAULT], i_bedict) self.be_new = be_new # the new actual values self.be_inst = i_bedict # the new dict (without defaults) @@ -5897,7 +6020,7 @@ class LUSetInstanceParams(LogicalUnit): self.warn.append("Can't get info from primary node %s" % pnode) else: if not instance_info.failed and instance_info.data: - current_mem = instance_info.data['memory'] + current_mem = int(instance_info.data['memory']) else: # Assume instance not running # (there is a slight race condition here, but it's not very probable, @@ -6241,7 +6364,7 @@ class LUExportInstance(LogicalUnit): finally: if self.op.shutdown and instance.admin_up: - result = self.rpc.call_instance_start(src_node, instance) + result = self.rpc.call_instance_start(src_node, instance, None, None) msg = result.RemoteFailMsg() if msg: _ShutdownInstanceDisks(self, instance) @@ -6710,6 +6833,8 @@ class IAllocator(object): "disk_template": iinfo.disk_template, "hypervisor": iinfo.hypervisor, } + pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template, + pir["disks"]) instance_data[iinfo.name] = pir data["instances"] = instance_data