X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/f7c94c17b1ea35b8552bafa9718108be08334426..61413377c7a1c624c4a5f2c2ec55dbc3d8691896:/lib/cmdlib.py diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 4895576..782768d 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -1397,6 +1397,13 @@ class LUClusterVerify(LogicalUnit): _ErrorIf(test, self.ENODEHV, node, "hypervisor %s verify failure: '%s'", hv_name, hv_result) + hvp_result = nresult.get(constants.NV_HVPARAMS, None) + if ninfo.vm_capable and isinstance(hvp_result, list): + for item, hv_name, hv_result in hvp_result: + _ErrorIf(True, self.ENODEHV, node, + "hypervisor %s parameter verify failure (source %s): %s", + hv_name, item, hv_result) + test = nresult.get(constants.NV_NODESETUP, ["Missing NODESETUP results"]) _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s", @@ -2029,6 +2036,21 @@ class LUClusterVerify(LogicalUnit): return instdisk + def _VerifyHVP(self, hvp_data): + """Verifies locally the syntax of the hypervisor parameters. + + """ + for item, hv_name, hv_params in hvp_data: + msg = ("hypervisor %s parameters syntax check (source %s): %%s" % + (item, hv_name)) + try: + hv_class = hypervisor.GetHypervisor(hv_name) + utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) + hv_class.CheckParameterSyntax(hv_params) + except errors.GenericError, err: + self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err)) + + def BuildHooksEnv(self): """Build hooks env. @@ -2094,12 +2116,32 @@ class LUClusterVerify(LogicalUnit): local_checksums = utils.FingerprintFiles(file_names) + # Compute the set of hypervisor parameters + hvp_data = [] + for hv_name in hypervisors: + hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name))) + for os_name, os_hvp in cluster.os_hvp.items(): + for hv_name, hv_params in os_hvp.items(): + if not hv_params: + continue + full_params = cluster.GetHVDefaults(hv_name, os_name=os_name) + hvp_data.append(("os %s" % os_name, hv_name, full_params)) + # TODO: collapse identical parameter values in a single one + for instance in instanceinfo.values(): + if not instance.hvparams: + continue + hvp_data.append(("instance %s" % instance.name, instance.hypervisor, + cluster.FillHV(instance))) + # and verify them locally + self._VerifyHVP(hvp_data) + feedback_fn("* Gathering data (%d nodes)" % len(nodelist)) node_verify_param = { constants.NV_FILELIST: file_names, constants.NV_NODELIST: [node.name for node in nodeinfo if not node.offline], constants.NV_HYPERVISOR: hypervisors, + constants.NV_HVPARAMS: hvp_data, constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip) for node in nodeinfo if not node.offline], @@ -2405,15 +2447,13 @@ class LUClusterVerifyDisks(NoHooksLU): """ result = res_nodes, res_instances, res_missing = {}, [], {} - nodes = utils.NiceSort(self.cfg.GetNodeList()) - instances = [self.cfg.GetInstanceInfo(name) - for name in self.cfg.GetInstanceList()] + nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList()) + instances = self.cfg.GetAllInstancesInfo().values() nv_dict = {} for inst in instances: inst_lvs = {} - if (not inst.admin_up or - inst.disk_template not in constants.DTS_NET_MIRROR): + if not inst.admin_up: continue inst.MapLVsByNode(inst_lvs) # transform { iname: {node: [vol,],},} to {(node, vol): iname} @@ -2424,14 +2464,8 @@ class LUClusterVerifyDisks(NoHooksLU): if not nv_dict: return result - vg_names = self.rpc.call_vg_list(nodes) - for node in nodes: - vg_names[node].Raise("Cannot get list of VGs") - - for node in nodes: - # node_volume - node_res = self.rpc.call_lv_list([node], - vg_names[node].payload.keys())[node] + node_lvs = self.rpc.call_lv_list(nodes, []) + for node, node_res in node_lvs.items(): if node_res.offline: continue msg = node_res.fail_msg @@ -3192,31 +3226,33 @@ class LUOobCommand(NoHooksLU): Any errors are signaled by raising errors.OpPrereqError. """ - self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) - node = self.cfg.GetNodeInfo(self.op.node_name) - - if node is None: - raise errors.OpPrereqError("Node %s not found" % self.op.node_name) - - self.oob_program = _SupportsOob(self.cfg, node) + self.nodes = [] + for node_name in self.op.node_names: + node = self.cfg.GetNodeInfo(node_name) - if not self.oob_program: - raise errors.OpPrereqError("OOB is not supported for node %s" % - self.op.node_name) - - if self.op.command == constants.OOB_POWER_OFF and not node.offline: - raise errors.OpPrereqError(("Cannot power off node %s because it is" - " not marked offline") % self.op.node_name) + if node is None: + raise errors.OpPrereqError("Node %s not found" % node_name, + errors.ECODE_NOENT) + else: + self.nodes.append(node) - self.node = node + if (self.op.command == constants.OOB_POWER_OFF and not node.offline): + raise errors.OpPrereqError(("Cannot power off node %s because it is" + " not marked offline") % node_name, + errors.ECODE_STATE) def ExpandNames(self): """Gather locks we need. """ - node_name = _ExpandNodeName(self.cfg, self.op.node_name) + if self.op.node_names: + self.op.node_names = [_ExpandNodeName(self.cfg, name) + for name in self.op.node_names] + else: + self.op.node_names = self.cfg.GetNodeList() + self.needed_locks = { - locking.LEVEL_NODE: [node_name], + locking.LEVEL_NODE: self.op.node_names, } def Exec(self, feedback_fn): @@ -3224,40 +3260,63 @@ class LUOobCommand(NoHooksLU): """ master_node = self.cfg.GetMasterNode() - node = self.node - - logging.info("Executing out-of-band command '%s' using '%s' on %s", - self.op.command, self.oob_program, self.op.node_name) - result = self.rpc.call_run_oob(master_node, self.oob_program, - self.op.command, self.op.node_name, - self.op.timeout) + ret = [] - result.Raise("An error occurred on execution of OOB helper") + for node in self.nodes: + node_entry = [(constants.RS_NORMAL, node.name)] + ret.append(node_entry) - self._CheckPayload(result) + oob_program = _SupportsOob(self.cfg, node) - if self.op.command == constants.OOB_HEALTH: - # For health we should log important events - for item, status in result.payload: - if status in [constants.OOB_STATUS_WARNING, - constants.OOB_STATUS_CRITICAL]: - logging.warning("On node '%s' item '%s' has status '%s'", - self.op.node_name, item, status) - - if self.op.command == constants.OOB_POWER_ON: - node.powered = True - elif self.op.command == constants.OOB_POWER_OFF: - node.powered = False - elif self.op.command == constants.OOB_POWER_STATUS: - powered = result.payload[constants.OOB_POWER_STATUS_POWERED] - if powered != self.node.powered: - logging.warning(("Recorded power state (%s) of node '%s' does not match" - " actual power state (%s)"), node.powered, - self.op.node_name, powered) + if not oob_program: + node_entry.append((constants.RS_UNAVAIL, None)) + continue - self.cfg.Update(node, feedback_fn) + logging.info("Executing out-of-band command '%s' using '%s' on %s", + self.op.command, oob_program, node.name) + result = self.rpc.call_run_oob(master_node, oob_program, + self.op.command, node.name, + self.op.timeout) - return result.payload + if result.fail_msg: + self.LogWarning("On node '%s' out-of-band RPC failed with: %s", + node.name, result.fail_msg) + node_entry.append((constants.RS_NODATA, None)) + else: + try: + self._CheckPayload(result) + except errors.OpExecError, err: + self.LogWarning("The payload returned by '%s' is not valid: %s", + node.name, err) + node_entry.append((constants.RS_NODATA, None)) + else: + if self.op.command == constants.OOB_HEALTH: + # For health we should log important events + for item, status in result.payload: + if status in [constants.OOB_STATUS_WARNING, + constants.OOB_STATUS_CRITICAL]: + self.LogWarning("On node '%s' item '%s' has status '%s'", + node.name, item, status) + + if self.op.command == constants.OOB_POWER_ON: + node.powered = True + elif self.op.command == constants.OOB_POWER_OFF: + node.powered = False + elif self.op.command == constants.OOB_POWER_STATUS: + powered = result.payload[constants.OOB_POWER_STATUS_POWERED] + if powered != node.powered: + logging.warning(("Recorded power state (%s) of node '%s' does not" + " match actual power state (%s)"), node.powered, + node.name, powered) + + # For configuration changing commands we should update the node + if self.op.command in (constants.OOB_POWER_ON, + constants.OOB_POWER_OFF): + self.cfg.Update(node, feedback_fn) + + node_entry.append((constants.RS_NORMAL, result.payload)) + + return ret def _CheckPayload(self, result): """Checks if the payload is valid. @@ -3271,10 +3330,11 @@ class LUOobCommand(NoHooksLU): if not isinstance(result.payload, list): errs.append("command 'health' is expected to return a list but got %s" % type(result.payload)) - for item, status in result.payload: - if status not in constants.OOB_STATUSES: - errs.append("health item '%s' has invalid status '%s'" % - (item, status)) + else: + for item, status in result.payload: + if status not in constants.OOB_STATUSES: + errs.append("health item '%s' has invalid status '%s'" % + (item, status)) if self.op.command == constants.OOB_POWER_STATUS: if not isinstance(result.payload, dict): @@ -3369,7 +3429,9 @@ class LUOsDiagnose(NoHooksLU): """Compute the list of OSes. """ - valid_nodes = [node for node in self.cfg.GetOnlineNodeList()] + valid_nodes = [node.name + for node in self.cfg.GetAllNodesInfo().values() + if not node.offline and node.vm_capable] node_data = self.rpc.call_os_diagnose(valid_nodes) pol = self._DiagnoseByOS(node_data) output = [] @@ -3806,18 +3868,21 @@ class _InstanceQuery(_QueryBase): """Computes the list of instances and their attributes. """ + cluster = lu.cfg.GetClusterInfo() all_info = lu.cfg.GetAllInstancesInfo() instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE) instance_list = [all_info[name] for name in instance_names] - nodes = frozenset([inst.primary_node for inst in instance_list]) + nodes = frozenset(itertools.chain(*(inst.all_nodes + for inst in instance_list))) hv_list = list(set([inst.hypervisor for inst in instance_list])) bad_nodes = [] offline_nodes = [] + wrongnode_inst = set() # Gather data as requested - if query.IQ_LIVE in self.requested_data: + if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]): live_data = {} node_data = lu.rpc.call_all_instances_info(nodes, hv_list) for name in nodes: @@ -3829,7 +3894,11 @@ class _InstanceQuery(_QueryBase): if result.fail_msg: bad_nodes.append(name) elif result.payload: - live_data.update(result.payload) + for inst in result.payload: + if all_info[inst].primary_node == name: + live_data.update(result.payload) + else: + wrongnode_inst.add(inst) # else no instance is alive else: live_data = {} @@ -3843,9 +3912,21 @@ class _InstanceQuery(_QueryBase): else: disk_usage = None + if query.IQ_CONSOLE in self.requested_data: + consinfo = {} + for inst in instance_list: + if inst.name in live_data: + # Instance is running + consinfo[inst.name] = _GetInstanceConsole(cluster, inst) + else: + consinfo[inst.name] = None + assert set(consinfo.keys()) == set(instance_names) + else: + consinfo = None + return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(), disk_usage, offline_nodes, bad_nodes, - live_data) + live_data, wrongnode_inst, consinfo) class LUQuery(NoHooksLU): @@ -4319,15 +4400,15 @@ class LUNodeSetParams(LogicalUnit): errors.ECODE_STATE) if node.master_candidate and self.might_demote and not self.lock_all: - assert not self.op.auto_promote, "auto-promote set but lock_all not" + assert not self.op.auto_promote, "auto_promote set but lock_all not" # check if after removing the current node, we're missing master # candidates (mc_remaining, mc_should, _) = \ self.cfg.GetMasterCandidateStats(exceptions=[node.name]) if mc_remaining < mc_should: raise errors.OpPrereqError("Not enough master candidates, please" - " pass auto_promote to allow promotion", - errors.ECODE_STATE) + " pass auto promote option to allow" + " promotion", errors.ECODE_STATE) self.old_flags = old_flags = (node.master_candidate, node.drained, node.offline) @@ -4578,6 +4659,8 @@ class LUClusterQuery(NoHooksLU): "reserved_lvs": cluster.reserved_lvs, "primary_ip_version": primary_ip_version, "prealloc_wipe_disks": cluster.prealloc_wipe_disks, + "hidden_os": cluster.hidden_os, + "blacklisted_os": cluster.blacklisted_os, } return result @@ -4700,13 +4783,13 @@ def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False, # SyncSource, etc.) # 1st pass, assemble on all nodes in secondary mode - for inst_disk in disks: + for idx, inst_disk in enumerate(disks): for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): if ignore_size: node_disk = node_disk.Copy() node_disk.UnsetSize() lu.cfg.SetDiskID(node_disk, node) - result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False) + result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx) msg = result.fail_msg if msg: lu.proc.LogWarning("Could not prepare block device %s on node %s" @@ -4718,7 +4801,7 @@ def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False, # FIXME: race condition on drbd migration to primary # 2nd pass, do only the primary node - for inst_disk in disks: + for idx, inst_disk in enumerate(disks): dev_path = None for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): @@ -4728,7 +4811,7 @@ def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False, node_disk = node_disk.Copy() node_disk.UnsetSize() lu.cfg.SetDiskID(node_disk, node) - result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True) + result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx) msg = result.fail_msg if msg: lu.proc.LogWarning("Could not prepare block device %s on node %s" @@ -4794,7 +4877,10 @@ class LUInstanceDeactivateDisks(NoHooksLU): """ instance = self.instance - _SafeShutdownInstanceDisks(self, instance) + if self.op.force: + _ShutdownInstanceDisks(self, instance) + else: + _SafeShutdownInstanceDisks(self, instance) def _SafeShutdownInstanceDisks(lu, instance, disks=None): @@ -5863,7 +5949,7 @@ class LUInstanceMove(LogicalUnit): for idx, disk in enumerate(instance.disks): self.LogInfo("Copying data for disk %d", idx) result = self.rpc.call_blockdev_assemble(target_node, disk, - instance.name, True) + instance.name, True, idx) if result.fail_msg: self.LogWarning("Can't assemble newly created disk %d: %s", idx, result.fail_msg) @@ -7731,18 +7817,28 @@ class LUInstanceConsole(NoHooksLU): logging.debug("Connecting to console of %s on %s", instance.name, node) - hyper = hypervisor.GetHypervisor(instance.hypervisor) - cluster = self.cfg.GetClusterInfo() - # beparams and hvparams are passed separately, to avoid editing the - # instance and then saving the defaults in the instance itself. - hvparams = cluster.FillHV(instance) - beparams = cluster.FillBE(instance) - console = hyper.GetInstanceConsole(instance, hvparams, beparams) + return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance) + + +def _GetInstanceConsole(cluster, instance): + """Returns console information for an instance. + + @type cluster: L{objects.Cluster} + @type instance: L{objects.Instance} + @rtype: dict + + """ + hyper = hypervisor.GetHypervisor(instance.hypervisor) + # beparams and hvparams are passed separately, to avoid editing the + # instance and then saving the defaults in the instance itself. + hvparams = cluster.FillHV(instance) + beparams = cluster.FillBE(instance) + console = hyper.GetInstanceConsole(instance, hvparams, beparams) - assert console.instance == instance.name - assert console.Validate() + assert console.instance == instance.name + assert console.Validate() - return console.ToDict() + return console.ToDict() class LUInstanceReplaceDisks(LogicalUnit): @@ -10286,9 +10382,9 @@ class LUGroupRemove(LogicalUnit): # Verify the cluster would not be left group-less. if len(self.cfg.GetNodeGroupList()) == 1: - raise errors.OpPrereqError("Group '%s' is the last group in the cluster," - " which cannot be left without at least one" - " group" % self.op.group_name, + raise errors.OpPrereqError("Group '%s' is the only group," + " cannot be removed" % + self.op.group_name, errors.ECODE_STATE) def BuildHooksEnv(self):