X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/2ee88aeb76a2430ec0c7f86629bf66cfd0b6f564..83d92ad8a5c653b599fec0f9cb550875012aef25:/lib/cmdlib.py diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 16b4ec9..ea36327 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -501,12 +501,15 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, if nics: nic_count = len(nics) - for idx, (ip, bridge, mac) in enumerate(nics): + for idx, (ip, mac, mode, link) in enumerate(nics): if ip is None: ip = "" env["INSTANCE_NIC%d_IP" % idx] = ip - env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge env["INSTANCE_NIC%d_MAC" % idx] = mac + env["INSTANCE_NIC%d_MODE" % idx] = mode + env["INSTANCE_NIC%d_LINK" % idx] = link + if mode == constants.NIC_MODE_BRIDGED: + env["INSTANCE_NIC%d_BRIDGE" % idx] = link else: nic_count = 0 @@ -524,6 +527,27 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, return env +def _PreBuildNICHooksList(lu, nics): + """Build a list of nic information tuples. + + This list is suitable to be passed to _BuildInstanceHookEnv. + + @type lu: L{LogicalUnit} + @param lu: the logical unit on whose behalf we execute + @type nics: list of L{objects.NIC} + @param nics: list of nics to convert to hooks tuples + + """ + hooks_nics = [] + c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT] + for nic in nics: + ip = nic.ip + mac = nic.mac + filled_params = objects.FillDict(c_nicparams, nic.nicparams) + mode = filled_params[constants.NIC_MODE] + link = filled_params[constants.NIC_LINK] + hooks_nics.append((ip, mac, mode, link)) + return hooks_nics def _BuildInstanceHookEnvByObject(lu, instance, override=None): """Builds instance related env variables for hooks from an object. @@ -549,7 +573,7 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None): 'status': instance.admin_up, 'memory': bep[constants.BE_MEMORY], 'vcpus': bep[constants.BE_VCPUS], - 'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics], + 'nics': _PreBuildNICHooksList(lu, instance.nics), 'disk_template': instance.disk_template, 'disks': [(disk.size, disk.mode) for disk in instance.disks], } @@ -574,18 +598,31 @@ def _AdjustCandidatePool(lu): (mc_now, mc_max)) -def _CheckInstanceBridgesExist(lu, instance): +def _CheckNicsBridgesExist(lu, target_nics, target_node, + profile=constants.PP_DEFAULT): + """Check that the brigdes needed by a list of nics exist. + + """ + c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile] + paramslist = [objects.FillDict(c_nicparams, nic.nicparams) + for nic in target_nics] + brlist = [params[constants.NIC_LINK] for params in paramslist + if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED] + if brlist: + result = lu.rpc.call_bridges_exist(target_node, brlist) + msg = result.RemoteFailMsg() + if msg: + raise errors.OpPrereqError("Error checking bridges on destination node" + " '%s': %s" % (target_node, msg)) + + +def _CheckInstanceBridgesExist(lu, instance, node=None): """Check that the brigdes needed by an instance exist. """ - # check bridges existance - brlist = [nic.bridge for nic in instance.nics] - result = lu.rpc.call_bridges_exist(instance.primary_node, brlist) - result.Raise() - if not result.data: - raise errors.OpPrereqError("One or more target bridges %s does not" - " exist on destination node '%s'" % - (brlist, instance.primary_node)) + if node is None: + node=instance.primary_node + _CheckNicsBridgesExist(lu, instance.nics, node) class LUDestroyCluster(NoHooksLU): @@ -619,9 +656,9 @@ class LUDestroyCluster(NoHooksLU): """ master = self.cfg.GetMasterNode() result = self.rpc.call_node_stop_master(master, False) - result.Raise() - if not result.data: - raise errors.OpExecError("Could not disable the master role") + msg = result.RemoteFailMsg() + if msg: + raise errors.OpExecError("Could not disable the master role: %s" % msg) priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS) utils.CreateBackup(priv_key) utils.CreateBackup(pub_key) @@ -985,7 +1022,6 @@ class LUVerifyCluster(LogicalUnit): for node_i in nodeinfo: node = node_i.name - nresult = all_nvinfo[node].data if node_i.offline: feedback_fn("* Skipping offline node %s" % (node,)) @@ -1003,11 +1039,13 @@ class LUVerifyCluster(LogicalUnit): ntype = "regular" feedback_fn("* Verifying node %s (%s)" % (node, ntype)) - if all_nvinfo[node].failed or not isinstance(nresult, dict): - feedback_fn(" - ERROR: connection to %s failed" % (node,)) + msg = all_nvinfo[node].RemoteFailMsg() + if msg: + feedback_fn(" - ERROR: while contacting node %s: %s" % (node, msg)) bad = True continue + nresult = all_nvinfo[node].payload node_drbd = {} for minor, instance in all_drbd_map[node].items(): if instance not in instanceinfo: @@ -1251,8 +1289,13 @@ class LUVerifyDisks(NoHooksLU): def Exec(self, feedback_fn): """Verify integrity of cluster disks. + @rtype: tuple of three items + @return: a tuple of (dict of node-to-node_error, list of instances + which need activate-disks, dict of instance: (node, volume) for + missing volumes + """ - result = res_nodes, res_nlvm, res_instances, res_missing = [], {}, [], {} + result = res_nodes, res_instances, res_missing = {}, [], {} vg_name = self.cfg.GetVGName() nodes = utils.NiceSort(self.cfg.GetNodeList()) @@ -1279,24 +1322,17 @@ class LUVerifyDisks(NoHooksLU): to_act = set() for node in nodes: # node_volume - lvs = node_lvs[node] - if lvs.failed: - if not lvs.offline: - self.LogWarning("Connection to node %s failed: %s" % - (node, lvs.data)) + node_res = node_lvs[node] + if node_res.offline: continue - lvs = lvs.data - if isinstance(lvs, basestring): - logging.warning("Error enumerating LVs on node %s: %s", node, lvs) - res_nlvm[node] = lvs - continue - elif not isinstance(lvs, dict): - logging.warning("Connection to node %s failed or invalid data" - " returned", node) - res_nodes.append(node) + msg = node_res.RemoteFailMsg() + if msg: + logging.warning("Error enumerating LVs on node %s: %s", node, msg) + res_nodes[node] = msg continue - for lv_name, (_, lv_inactive, lv_online) in lvs.iteritems(): + lvs = node_res.payload + for lv_name, (_, lv_inactive, lv_online) in lvs.items(): inst = nv_dict.pop((node, lv_name), None) if (not lv_online and inst is not None and inst.name not in res_instances): @@ -1362,8 +1398,9 @@ class LURenameCluster(LogicalUnit): # shutdown the master IP master = self.cfg.GetMasterNode() result = self.rpc.call_node_stop_master(master, False) - if result.failed or not result.data: - raise errors.OpExecError("Could not disable the master role") + msg = result.RemoteFailMsg() + if msg: + raise errors.OpExecError("Could not disable the master role: %s" % msg) try: cluster = self.cfg.GetClusterInfo() @@ -1389,9 +1426,10 @@ class LURenameCluster(LogicalUnit): finally: result = self.rpc.call_node_start_master(master, False) - if result.failed or not result.data: + msg = result.RemoteFailMsg() + if msg: self.LogWarning("Could not re-enable the master role on" - " the master, please restart manually.") + " the master, please restart manually: %s", msg) def _RecursiveCheckIfLVMBased(disk): @@ -1474,11 +1512,13 @@ class LUSetClusterParams(LogicalUnit): if self.op.vg_name: vglist = self.rpc.call_vg_list(node_list) for node in node_list: - if vglist[node].failed: + msg = vglist[node].RemoteFailMsg() + if msg: # ignoring down node - self.LogWarning("Node %s unreachable/error, ignoring" % node) + self.LogWarning("Error while gathering data on node %s" + " (ignoring node): %s", node, msg) continue - vgstatus = utils.CheckVolumeGroupSize(vglist[node].data, + vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload, self.op.vg_name, constants.MIN_VG_SIZE) if vgstatus: @@ -1650,15 +1690,16 @@ def _WaitForSync(lu, instance, oneshot=False, unlock=False): done = True cumul_degraded = False rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks) - if rstats.failed or not rstats.data: - lu.LogWarning("Can't get any data from node %s", node) + msg = rstats.RemoteFailMsg() + if msg: + lu.LogWarning("Can't get any data from node %s: %s", node, msg) retries += 1 if retries >= 10: raise errors.RemoteError("Can't contact node %s for mirror data," " aborting." % node) time.sleep(6) continue - rstats = rstats.data + rstats = rstats.payload retries = 0 for i, mstat in enumerate(rstats): if mstat is None: @@ -1770,11 +1811,12 @@ class LUDiagnoseOS(NoHooksLU): # level), so that nodes with a non-responding node daemon don't # make all OSes invalid good_nodes = [node_name for node_name in rlist - if not rlist[node_name].failed] - for node_name, nr in rlist.iteritems(): - if nr.failed or not nr.data: + if not rlist[node_name].RemoteFailMsg()] + for node_name, nr in rlist.items(): + if nr.RemoteFailMsg() or not nr.payload: continue - for os_obj in nr.data: + for os_serialized in nr.payload: + os_obj = objects.OS.FromDict(os_serialized) if os_obj.name not in all_os: # build a list of nodes for this os containing empty lists # for each node in node_list @@ -1790,11 +1832,9 @@ class LUDiagnoseOS(NoHooksLU): """ valid_nodes = [node for node in self.cfg.GetOnlineNodeList()] node_data = self.rpc.call_os_diagnose(valid_nodes) - if node_data == False: - raise errors.OpExecError("Can't gather the list of OSes") pol = self._DiagnoseByOS(valid_nodes, node_data) output = [] - for os_name, os_data in pol.iteritems(): + for os_name, os_data in pol.items(): row = [] for field in self.op.output_fields: if field == "name": @@ -1876,7 +1916,11 @@ class LURemoveNode(LogicalUnit): self.context.RemoveNode(node.name) - self.rpc.call_node_leave_cluster(node.name) + result = self.rpc.call_node_leave_cluster(node.name) + msg = result.RemoteFailMsg() + if msg: + self.LogWarning("Errors encountered on the remote node while leaving" + " the cluster: %s", msg) # Promote nodes to master candidate as needed _AdjustCandidatePool(self) @@ -1961,8 +2005,8 @@ class LUQueryNodes(NoHooksLU): self.cfg.GetHypervisorType()) for name in nodenames: nodeinfo = node_data[name] - if not nodeinfo.failed and nodeinfo.data: - nodeinfo = nodeinfo.data + if not nodeinfo.RemoteFailMsg() and nodeinfo.payload: + nodeinfo = nodeinfo.payload fn = utils.TryConvert live_data[name] = { "mtotal": fn(int, nodeinfo.get('memory_total', None)), @@ -2084,10 +2128,15 @@ class LUQueryNodeVolumes(NoHooksLU): output = [] for node in nodenames: - if node not in volumes or volumes[node].failed or not volumes[node].data: + nresult = volumes[node] + if nresult.offline: + continue + msg = nresult.RemoteFailMsg() + if msg: + self.LogWarning("Can't compute volume data on node %s: %s", node, msg) continue - node_vols = volumes[node].data[:] + node_vols = nresult.payload[:] node_vols.sort(key=lambda vol: vol['dev']) for vol in node_vols: @@ -2237,17 +2286,17 @@ class LUAddNode(LogicalUnit): # check connectivity result = self.rpc.call_version([node])[node] - result.Raise() - if result.data: - if constants.PROTOCOL_VERSION == result.data: - logging.info("Communication to node %s fine, sw version %s match", - node, result.data) - else: - raise errors.OpExecError("Version mismatch master version %s," - " node version %s" % - (constants.PROTOCOL_VERSION, result.data)) + msg = result.RemoteFailMsg() + if msg: + raise errors.OpExecError("Can't get version information from" + " node %s: %s" % (node, msg)) + if constants.PROTOCOL_VERSION == result.payload: + logging.info("Communication to node %s fine, sw version %s match", + node, result.payload) else: - raise errors.OpExecError("Cannot get version from the new node") + raise errors.OpExecError("Version mismatch master version %s," + " node version %s" % + (constants.PROTOCOL_VERSION, result.payload)) # setup ssh on node logging.info("Copy ssh key to node %s", node) @@ -2280,7 +2329,11 @@ class LUAddNode(LogicalUnit): if new_node.secondary_ip != new_node.primary_ip: result = self.rpc.call_node_has_ip_address(new_node.name, new_node.secondary_ip) - if result.failed or not result.data: + msg = result.RemoteFailMsg() + if msg: + raise errors.OpPrereqError("Failure checking secondary ip" + " on node %s: %s" % (new_node.name, msg)) + if not result.payload: raise errors.OpExecError("Node claims it doesn't have the secondary ip" " you gave (%s). Please fix and re-run this" " command." % new_node.secondary_ip) @@ -2294,13 +2347,15 @@ class LUAddNode(LogicalUnit): result = self.rpc.call_node_verify(node_verify_list, node_verify_param, self.cfg.GetClusterName()) for verifier in node_verify_list: - if result[verifier].failed or not result[verifier].data: - raise errors.OpExecError("Cannot communicate with %s's node daemon" - " for remote verification" % verifier) - if result[verifier].data['nodelist']: - for failed in result[verifier].data['nodelist']: + msg = result[verifier].RemoteFailMsg() + if msg: + raise errors.OpExecError("Cannot communicate with node %s: %s" % + (verifier, msg)) + nl_payload = result[verifier].payload['nodelist'] + if nl_payload: + for failed in nl_payload: feedback_fn("ssh/hostname verification failed %s -> %s" % - (verifier, result[verifier].data['nodelist'][failed])) + (verifier, nl_payload[failed])) raise errors.OpExecError("ssh/hostname verification failed.") if self.op.readd: @@ -2520,7 +2575,6 @@ class LUQueryClusterInfo(NoHooksLU): "beparams": cluster.beparams, "nicparams": cluster.nicparams, "candidate_pool_size": cluster.candidate_pool_size, - "default_bridge": cluster.default_bridge, "master_netdev": cluster.master_netdev, "volume_group_name": cluster.volume_group_name, "file_storage_dir": cluster.file_storage_dir, @@ -2732,14 +2786,14 @@ def _SafeShutdownInstanceDisks(lu, instance): _ShutdownInstanceDisks. """ - ins_l = lu.rpc.call_instance_list([instance.primary_node], - [instance.hypervisor]) - ins_l = ins_l[instance.primary_node] - if ins_l.failed or not isinstance(ins_l.data, list): - raise errors.OpExecError("Can't contact node '%s'" % - instance.primary_node) - - if instance.name in ins_l.data: + pnode = instance.primary_node + ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor]) + ins_l = ins_l[pnode] + msg = ins_l.RemoteFailMsg() + if msg: + raise errors.OpExecError("Can't contact node %s: %s" % (pnode, msg)) + + if instance.name in ins_l.payload: raise errors.OpExecError("Instance is running, can't shutdown" " block devices.") @@ -2792,15 +2846,17 @@ def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name): """ nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name) - nodeinfo[node].Raise() - free_mem = nodeinfo[node].data.get('memory_free') + msg = nodeinfo[node].RemoteFailMsg() + if msg: + raise errors.OpPrereqError("Can't get data from node %s: %s" % (node, msg)) + free_mem = nodeinfo[node].payload.get('memory_free', None) if not isinstance(free_mem, int): raise errors.OpPrereqError("Can't compute free memory on node %s, result" - " was '%s'" % (node, free_mem)) + " was '%s'" % (node, free_mem)) if requested > free_mem: raise errors.OpPrereqError("Not enough memory on node %s for %s:" - " needed %s MiB, available %s MiB" % - (node, reason, requested, free_mem)) + " needed %s MiB, available %s MiB" % + (node, reason, requested, free_mem)) class LUStartupInstance(LogicalUnit): @@ -2875,8 +2931,11 @@ class LUStartupInstance(LogicalUnit): remote_info = self.rpc.call_instance_info(instance.primary_node, instance.name, instance.hypervisor) - remote_info.Raise() - if not remote_info.data: + msg = remote_info.RemoteFailMsg() + if msg: + raise errors.OpPrereqError("Error checking node %s: %s" % + (instance.primary_node, msg)) + if not remote_info.payload: # not running already _CheckNodeFreeMemory(self, instance.primary_node, "starting instance %s" % instance.name, bep[constants.BE_MEMORY], instance.hypervisor) @@ -3077,8 +3136,11 @@ class LUReinstallInstance(LogicalUnit): remote_info = self.rpc.call_instance_info(instance.primary_node, instance.name, instance.hypervisor) - remote_info.Raise() - if remote_info.data: + msg = remote_info.RemoteFailMsg() + if msg: + raise errors.OpPrereqError("Error checking node %s: %s" % + (instance.primary_node, msg)) + if remote_info.payload: raise errors.OpPrereqError("Instance '%s' is running on the node %s" % (self.op.instance_name, instance.primary_node)) @@ -3161,8 +3223,11 @@ class LURenameInstance(LogicalUnit): remote_info = self.rpc.call_instance_info(instance.primary_node, instance.name, instance.hypervisor) - remote_info.Raise() - if remote_info.data: + msg = remote_info.RemoteFailMsg() + if msg: + raise errors.OpPrereqError("Error checking node %s: %s" % + (instance.primary_node, msg)) + if remote_info.payload: raise errors.OpPrereqError("Instance '%s' is running on the node %s" % (self.op.instance_name, instance.primary_node)) @@ -3400,12 +3465,12 @@ class LUQueryInstances(NoHooksLU): if result.offline: # offline nodes will be in both lists off_nodes.append(name) - if result.failed: + if result.failed or result.RemoteFailMsg(): bad_nodes.append(name) else: - if result.data: - live_data.update(result.data) - # else no instance is alive + if result.payload: + live_data.update(result.payload) + # else no instance is alive else: live_data = dict([(name, {}) for name in instance_names]) @@ -3600,15 +3665,8 @@ class LUFailoverInstance(LogicalUnit): _CheckNodeFreeMemory(self, target_node, "failing over instance %s" % instance.name, bep[constants.BE_MEMORY], instance.hypervisor) - # check bridge existance - brlist = [nic.bridge for nic in instance.nics] - result = self.rpc.call_bridges_exist(target_node, brlist) - result.Raise() - if not result.data: - raise errors.OpPrereqError("One or more target bridges %s does not" - " exist on destination node '%s'" % - (brlist, target_node)) + _CheckInstanceBridgesExist(self, instance, node=target_node) def Exec(self, feedback_fn): """Failover an instance. @@ -3740,12 +3798,7 @@ class LUMigrateInstance(LogicalUnit): instance.hypervisor) # check bridge existance - brlist = [nic.bridge for nic in instance.nics] - result = self.rpc.call_bridges_exist(target_node, brlist) - if result.failed or not result.data: - raise errors.OpPrereqError("One or more target bridges %s does not" - " exist on destination node '%s'" % - (brlist, target_node)) + _CheckInstanceBridgesExist(self, instance, node=target_node) if not self.op.cleanup: _CheckNodeNotDrained(self, target_node) @@ -3856,12 +3909,12 @@ class LUMigrateInstance(LogicalUnit): " a bad state)") ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor]) for node, result in ins_l.items(): - result.Raise() - if not isinstance(result.data, list): - raise errors.OpExecError("Can't contact node '%s'" % node) + msg = result.RemoteFailMsg() + if msg: + raise errors.OpExecError("Can't contact node %s: %s" % (node, msg)) - runningon_source = instance.name in ins_l[source_node].data - runningon_target = instance.name in ins_l[target_node].data + runningon_source = instance.name in ins_l[source_node].payload + runningon_target = instance.name in ins_l[target_node].payload if runningon_source and runningon_target: raise errors.OpExecError("Instance seems to be running on two nodes," @@ -4622,7 +4675,7 @@ class LUCreateInstance(LogicalUnit): os_type=self.op.os_type, memory=self.be_full[constants.BE_MEMORY], vcpus=self.be_full[constants.BE_VCPUS], - nics=[(n.ip, n.bridge, n.mac) for n in self.nics], + nics=_PreBuildNICHooksList(self, self.nics), disk_template=self.op.disk_template, disks=[(d["size"], d["mode"]) for d in self.disks], )) @@ -4646,11 +4699,13 @@ class LUCreateInstance(LogicalUnit): src_path = self.op.src_path if src_node is None: - exp_list = self.rpc.call_export_list( - self.acquired_locks[locking.LEVEL_NODE]) + locked_nodes = self.acquired_locks[locking.LEVEL_NODE] + exp_list = self.rpc.call_export_list(locked_nodes) found = False for node in exp_list: - if not exp_list[node].failed and src_path in exp_list[node].data: + if exp_list[node].RemoteFailMsg(): + continue + if src_path in exp_list[node].payload: found = True self.op.src_node = src_node = node self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR, @@ -4662,11 +4717,12 @@ class LUCreateInstance(LogicalUnit): _CheckNodeOnline(self, src_node) result = self.rpc.call_export_info(src_node, src_path) - result.Raise() - if not result.data: - raise errors.OpPrereqError("No export found in dir %s" % src_path) + msg = result.RemoteFailMsg() + if msg: + raise errors.OpPrereqError("No export or invalid export found in" + " dir %s: %s" % (src_path, msg)) - export_info = result.data + export_info = objects.SerializableConfigParser.Loads(str(result.payload)) if not export_info.has_section(constants.INISECT_EXP): raise errors.ProgrammerError("Corrupted export config") @@ -4772,19 +4828,19 @@ class LUCreateInstance(LogicalUnit): self.op.hypervisor) for node in nodenames: info = nodeinfo[node] - info.Raise() - info = info.data - if not info: + msg = info.RemoteFailMsg() + if msg: raise errors.OpPrereqError("Cannot get current information" - " from node '%s'" % node) + " from node %s: %s" % (node, msg)) + info = info.payload vg_free = info.get('vg_free', None) if not isinstance(vg_free, int): raise errors.OpPrereqError("Can't compute free disk space on" " node %s" % node) - if req_size > info['vg_free']: + if req_size > vg_free: raise errors.OpPrereqError("Not enough disk space on target node %s." " %d MB available, %d MB required" % - (node, info['vg_free'], req_size)) + (node, vg_free, req_size)) _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams) @@ -4795,14 +4851,7 @@ class LUCreateInstance(LogicalUnit): raise errors.OpPrereqError("OS '%s' not in supported os list for" " primary node" % self.op.os_type) - # bridge check on primary node - bridges = [n.bridge for n in self.nics] - result = self.rpc.call_bridges_exist(self.pnode.name, bridges) - result.Raise() - if not result.data: - raise errors.OpPrereqError("One of the target bridges '%s' does not" - " exist on destination node '%s'" % - (",".join(bridges), pnode.name)) + _CheckNicsBridgesExist(self, self.nics, self.pnode.name) # memory check on primary node if self.op.start: @@ -4926,12 +4975,10 @@ class LUCreateInstance(LogicalUnit): import_result = self.rpc.call_instance_os_import(pnode_name, iobj, src_node, src_images, cluster_name) - import_result.Raise() - for idx, result in enumerate(import_result.data): - if not result: - self.LogWarning("Could not import the image %s for instance" - " %s, disk %d, on node %s" % - (src_images[idx], instance, idx, pnode_name)) + msg = import_result.RemoteFailMsg() + if msg: + self.LogWarning("Error while importing the disk images for instance" + " %s on node %s: %s" % (instance, pnode_name, msg)) else: # also checked in the prereq part raise errors.ProgrammerError("Unknown OS initialization mode '%s'" @@ -4982,9 +5029,12 @@ class LUConnectConsole(NoHooksLU): node_insts = self.rpc.call_instance_list([node], [instance.hypervisor])[node] - node_insts.Raise() + msg = node_insts.RemoteFailMsg() + if msg: + raise errors.OpExecError("Can't get node information from %s: %s" % + (node, msg)) - if instance.name not in node_insts.data: + if instance.name not in node_insts.payload: raise errors.OpExecError("Instance %s is not running." % instance.name) logging.debug("Connecting to console of %s on %s", instance.name, node) @@ -5206,7 +5256,10 @@ class LUReplaceDisks(LogicalUnit): raise errors.OpExecError("Can't list volume groups on the nodes") for node in oth_node, tgt_node: res = results[node] - if res.failed or not res.data or my_vg not in res.data: + msg = res.RemoteFailMsg() + if msg: + raise errors.OpExecError("Error checking node %s: %s" % (node, msg)) + if my_vg not in res.payload: raise errors.OpExecError("Volume group '%s' not found on %s" % (my_vg, node)) for idx, dev in enumerate(instance.disks): @@ -5402,7 +5455,10 @@ class LUReplaceDisks(LogicalUnit): results = self.rpc.call_vg_list([pri_node, new_node]) for node in pri_node, new_node: res = results[node] - if res.failed or not res.data or my_vg not in res.data: + msg = res.RemoteFailMsg() + if msg: + raise errors.OpExecError("Error checking node %s: %s" % (node, msg)) + if my_vg not in res.payload: raise errors.OpExecError("Volume group '%s' not found on %s" % (my_vg, node)) for idx, dev in enumerate(instance.disks): @@ -5632,10 +5688,11 @@ class LUGrowDisk(LogicalUnit): instance.hypervisor) for node in nodenames: info = nodeinfo[node] - if info.failed or not info.data: + msg = info.RemoteFailMsg() + if msg: raise errors.OpPrereqError("Cannot get current information" - " from node '%s'" % node) - vg_free = info.data.get('vg_free', None) + " from node %s:" % (node, msg)) + vg_free = info.payload.get('vg_free', None) if not isinstance(vg_free, int): raise errors.OpPrereqError("Can't compute free disk space on" " node %s" % node) @@ -5782,8 +5839,11 @@ class LUQueryInstanceData(NoHooksLU): remote_info = self.rpc.call_instance_info(instance.primary_node, instance.name, instance.hypervisor) - remote_info.Raise() - remote_info = remote_info.data + msg = remote_info.RemoteFailMsg() + if msg: + raise errors.OpExecError("Error checking node %s: %s" % + (instance.primary_node, msg)) + remote_info = remote_info.payload if remote_info and "state" in remote_info: remote_state = "up" else: @@ -5898,10 +5958,16 @@ class LUSetInstanceParams(LogicalUnit): if not utils.IsValidIP(nic_ip): raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip) + nic_bridge = nic_dict.get('bridge', None) + nic_link = nic_dict.get('link', None) + if nic_bridge and nic_link: + raise errors.OpPrereqError("Cannot pass 'bridge' and 'link' at the same time") + elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE: + nic_dict['bridge'] = None + elif nic_link and nic_link.lower() == constants.VALUE_NONE: + nic_dict['link'] = None + if nic_op == constants.DDM_ADD: - nic_bridge = nic_dict.get('bridge', None) - if nic_bridge is None: - nic_dict['bridge'] = self.cfg.GetDefBridge() nic_mac = nic_dict.get('mac', None) if nic_mac is None: nic_dict['mac'] = constants.VALUE_AUTO @@ -5944,6 +6010,7 @@ class LUSetInstanceParams(LogicalUnit): if self.op.nics: args['nics'] = [] nic_override = dict(self.op.nics) + c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT] for idx, nic in enumerate(self.instance.nics): if idx in nic_override: this_nic_override = nic_override[idx] @@ -5953,20 +6020,24 @@ class LUSetInstanceParams(LogicalUnit): ip = this_nic_override['ip'] else: ip = nic.ip - if 'bridge' in this_nic_override: - bridge = this_nic_override['bridge'] - else: - bridge = nic.bridge if 'mac' in this_nic_override: mac = this_nic_override['mac'] else: mac = nic.mac - args['nics'].append((ip, bridge, mac)) + if idx in self.nic_pnew: + nicparams = self.nic_pnew[idx] + else: + nicparams = objects.FillDict(c_nicparams, nic.nicparams) + mode = nicparams[constants.NIC_MODE] + link = nicparams[constants.NIC_LINK] + args['nics'].append((ip, mac, mode, link)) if constants.DDM_ADD in nic_override: ip = nic_override[constants.DDM_ADD].get('ip', None) - bridge = nic_override[constants.DDM_ADD]['bridge'] mac = nic_override[constants.DDM_ADD]['mac'] - args['nics'].append((ip, bridge, mac)) + nicparams = self.nic_pnew[constants.DDM_ADD] + mode = nicparams[constants.NIC_MODE] + link = nicparams[constants.NIC_LINK] + args['nics'].append((ip, mac, mode, link)) elif constants.DDM_REMOVE in nic_override: del args['nics'][-1] @@ -5974,6 +6045,38 @@ class LUSetInstanceParams(LogicalUnit): nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return env, nl, nl + def _GetUpdatedParams(self, old_params, update_dict, + default_values, parameter_types): + """Return the new params dict for the given params. + + @type old_params: dict + @type old_params: old parameters + @type update_dict: dict + @type update_dict: dict containing new parameter values, + or constants.VALUE_DEFAULT to reset the + parameter to its default value + @type default_values: dict + @param default_values: default values for the filled parameters + @type parameter_types: dict + @param parameter_types: dict mapping target dict keys to types + in constants.ENFORCEABLE_TYPES + @rtype: (dict, dict) + @return: (new_parameters, filled_parameters) + + """ + params_copy = copy.deepcopy(old_params) + for key, val in update_dict.iteritems(): + if val == constants.VALUE_DEFAULT: + try: + del params_copy[key] + except KeyError: + pass + else: + params_copy[key] = val + utils.ForceDictType(params_copy, parameter_types) + params_filled = objects.FillDict(default_values, params_copy) + return (params_copy, params_filled) + def CheckPrereq(self): """Check prerequisites. @@ -5993,18 +6096,10 @@ class LUSetInstanceParams(LogicalUnit): # hvparams processing if self.op.hvparams: - i_hvdict = copy.deepcopy(instance.hvparams) - for key, val in self.op.hvparams.iteritems(): - if val == constants.VALUE_DEFAULT: - try: - del i_hvdict[key] - except KeyError: - pass - else: - i_hvdict[key] = val - utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES) - hv_new = objects.FillDict(cluster.hvparams[instance.hypervisor], - i_hvdict) + i_hvdict, hv_new = self._GetUpdatedParams( + instance.hvparams, self.op.hvparams, + cluster.hvparams[instance.hypervisor], + constants.HVS_PARAMETER_TYPES) # local check hypervisor.GetHypervisor( instance.hypervisor).CheckParameterSyntax(hv_new) @@ -6016,18 +6111,10 @@ class LUSetInstanceParams(LogicalUnit): # beparams processing if self.op.beparams: - i_bedict = copy.deepcopy(instance.beparams) - for key, val in self.op.beparams.iteritems(): - if val == constants.VALUE_DEFAULT: - try: - del i_bedict[key] - except KeyError: - pass - else: - i_bedict[key] = val - utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES) - be_new = objects.FillDict(cluster.beparams[constants.PP_DEFAULT], - i_bedict) + i_bedict, be_new = self._GetUpdatedParams( + instance.beparams, self.op.beparams, + cluster.beparams[constants.PP_DEFAULT], + constants.BES_PARAMETER_TYPES) self.be_new = be_new # the new actual values self.be_inst = i_bedict # the new dict (without defaults) else: @@ -6044,35 +6131,51 @@ class LUSetInstanceParams(LogicalUnit): instance.hypervisor) nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(), instance.hypervisor) - if nodeinfo[pnode].failed or not isinstance(nodeinfo[pnode].data, dict): + pninfo = nodeinfo[pnode] + msg = pninfo.RemoteFailMsg() + if msg: # Assume the primary node is unreachable and go ahead - self.warn.append("Can't get info from primary node %s" % pnode) + self.warn.append("Can't get info from primary node %s: %s" % + (pnode, msg)) + elif not isinstance(pninfo.payload.get('memory_free', None), int): + self.warn.append("Node data from primary node %s doesn't contain" + " free memory information" % pnode) + elif instance_info.RemoteFailMsg(): + self.warn.append("Can't get instance runtime information: %s" % + instance_info.RemoteFailMsg()) else: - if not instance_info.failed and instance_info.data: - current_mem = int(instance_info.data['memory']) + if instance_info.payload: + current_mem = int(instance_info.payload['memory']) else: # Assume instance not running # (there is a slight race condition here, but it's not very probable, # and we have no other way to check) current_mem = 0 miss_mem = (be_new[constants.BE_MEMORY] - current_mem - - nodeinfo[pnode].data['memory_free']) + pninfo.payload['memory_free']) if miss_mem > 0: raise errors.OpPrereqError("This change will prevent the instance" " from starting, due to %d MB of memory" " missing on its primary node" % miss_mem) if be_new[constants.BE_AUTO_BALANCE]: - for node, nres in nodeinfo.iteritems(): + for node, nres in nodeinfo.items(): if node not in instance.secondary_nodes: continue - if nres.failed or not isinstance(nres.data, dict): - self.warn.append("Can't get info from secondary node %s" % node) - elif be_new[constants.BE_MEMORY] > nres.data['memory_free']: + msg = nres.RemoteFailMsg() + if msg: + self.warn.append("Can't get info from secondary node %s: %s" % + (node, msg)) + elif not isinstance(nres.payload.get('memory_free', None), int): + self.warn.append("Secondary node %s didn't return free" + " memory information" % node) + elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']: self.warn.append("Not enough memory to failover instance to" " secondary node %s" % node) # NIC processing + self.nic_pnew = {} + self.nic_pinst = {} for nic_op, nic_dict in self.op.nics: if nic_op == constants.DDM_REMOVE: if not instance.nics: @@ -6084,17 +6187,46 @@ class LUSetInstanceParams(LogicalUnit): raise errors.OpPrereqError("Invalid NIC index %s, valid values" " are 0 to %d" % (nic_op, len(instance.nics))) + old_nic_params = instance.nics[nic_op].nicparams + old_nic_ip = instance.nics[nic_op].ip + else: + old_nic_params = {} + old_nic_ip = None + + update_params_dict = dict([(key, nic_dict[key]) + for key in constants.NICS_PARAMETERS + if key in nic_dict]) + if 'bridge' in nic_dict: - nic_bridge = nic_dict['bridge'] - if nic_bridge is None: - raise errors.OpPrereqError('Cannot set the nic bridge to None') - if not self.rpc.call_bridges_exist(pnode, [nic_bridge]): - msg = ("Bridge '%s' doesn't exist on one of" - " the instance nodes" % nic_bridge) + update_params_dict[constants.NIC_LINK] = nic_dict['bridge'] + + new_nic_params, new_filled_nic_params = \ + self._GetUpdatedParams(old_nic_params, update_params_dict, + cluster.nicparams[constants.PP_DEFAULT], + constants.NICS_PARAMETER_TYPES) + objects.NIC.CheckParameterSyntax(new_filled_nic_params) + self.nic_pinst[nic_op] = new_nic_params + self.nic_pnew[nic_op] = new_filled_nic_params + new_nic_mode = new_filled_nic_params[constants.NIC_MODE] + + if new_nic_mode == constants.NIC_MODE_BRIDGED: + nic_bridge = new_filled_nic_params[constants.NIC_LINK] + result = self.rpc.call_bridges_exist(pnode, [nic_bridge]) + msg = result.RemoteFailMsg() + if msg: + msg = "Error checking bridges on node %s: %s" % (pnode, msg) if self.force: self.warn.append(msg) else: raise errors.OpPrereqError(msg) + if new_nic_mode == constants.NIC_MODE_ROUTED: + if 'ip' in nic_dict: + nic_ip = nic_dict['ip'] + else: + nic_ip = old_nic_ip + if nic_ip is None: + raise errors.OpPrereqError('Cannot set the nic ip to None' + ' on a routed nic') if 'mac' in nic_dict: nic_mac = nic_dict['mac'] if nic_mac is None: @@ -6119,9 +6251,11 @@ class LUSetInstanceParams(LogicalUnit): " an instance") ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor]) ins_l = ins_l[pnode] - if ins_l.failed or not isinstance(ins_l.data, list): - raise errors.OpPrereqError("Can't contact node '%s'" % pnode) - if instance.name in ins_l.data: + msg = ins_l.RemoteFailMsg() + if msg: + raise errors.OpPrereqError("Can't contact node %s: %s" % + (pnode, msg)) + if instance.name in ins_l.payload: raise errors.OpPrereqError("Instance is running, can't remove" " disks.") @@ -6151,6 +6285,7 @@ class LUSetInstanceParams(LogicalUnit): result = [] instance = self.instance + cluster = self.cluster # disk changes for disk_op, disk_dict in self.op.disks: if disk_op == constants.DDM_REMOVE: @@ -6211,19 +6346,24 @@ class LUSetInstanceParams(LogicalUnit): elif nic_op == constants.DDM_ADD: # mac and bridge should be set, by now mac = nic_dict['mac'] - bridge = nic_dict['bridge'] - new_nic = objects.NIC(mac=mac, ip=nic_dict.get('ip', None), - bridge=bridge) + ip = nic_dict.get('ip', None) + nicparams = self.nic_pinst[constants.DDM_ADD] + new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams) instance.nics.append(new_nic) result.append(("nic.%d" % (len(instance.nics) - 1), - "add:mac=%s,ip=%s,bridge=%s" % - (new_nic.mac, new_nic.ip, new_nic.bridge))) + "add:mac=%s,ip=%s,mode=%s,link=%s" % + (new_nic.mac, new_nic.ip, + self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE], + self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK] + ))) else: - # change a given nic - for key in 'mac', 'ip', 'bridge': + for key in 'mac', 'ip': if key in nic_dict: setattr(instance.nics[nic_op], key, nic_dict[key]) - result.append(("nic.%s/%d" % (key, nic_op), nic_dict[key])) + if nic_op in self.nic_pnew: + instance.nics[nic_op].nicparams = self.nic_pnew[nic_op] + for key, val in nic_dict.iteritems(): + result.append(("nic.%s/%d" % (key, nic_op), val)) # hvparams changes if self.op.hvparams: @@ -6276,10 +6416,10 @@ class LUQueryExports(NoHooksLU): rpcresult = self.rpc.call_export_list(self.nodes) result = {} for node in rpcresult: - if rpcresult[node].failed: + if rpcresult[node].RemoteFailMsg(): result[node] = False else: - result[node] = rpcresult[node].data + result[node] = rpcresult[node].payload return result @@ -6378,16 +6518,17 @@ class LUExportInstance(LogicalUnit): try: for disk in instance.disks: - # new_dev_name will be a snapshot of an lvm leaf of the one we passed - new_dev_name = self.rpc.call_blockdev_snapshot(src_node, disk) - if new_dev_name.failed or not new_dev_name.data: - self.LogWarning("Could not snapshot block device %s on node %s", - disk.logical_id[1], src_node) + # result.payload will be a snapshot of an lvm leaf of the one we passed + result = self.rpc.call_blockdev_snapshot(src_node, disk) + msg = result.RemoteFailMsg() + if msg: + self.LogWarning("Could not snapshot block device %s on node %s: %s", + disk.logical_id[1], src_node, msg) snap_disks.append(False) else: + disk_id = (vgname, result.payload) new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size, - logical_id=(vgname, new_dev_name.data), - physical_id=(vgname, new_dev_name.data), + logical_id=disk_id, physical_id=disk_id, iv_name=disk.iv_name) snap_disks.append(new_dev) @@ -6406,19 +6547,21 @@ class LUExportInstance(LogicalUnit): if dev: result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name, instance, cluster_name, idx) - if result.failed or not result.data: + msg = result.RemoteFailMsg() + if msg: self.LogWarning("Could not export block device %s from node %s to" - " node %s", dev.logical_id[1], src_node, - dst_node.name) + " node %s: %s", dev.logical_id[1], src_node, + dst_node.name, msg) msg = self.rpc.call_blockdev_remove(src_node, dev).RemoteFailMsg() if msg: self.LogWarning("Could not remove snapshot block device %s from node" " %s: %s", dev.logical_id[1], src_node, msg) result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks) - if result.failed or not result.data: - self.LogWarning("Could not finalize export for instance %s on node %s", - instance.name, dst_node.name) + msg = result.RemoteFailMsg() + if msg: + self.LogWarning("Could not finalize export for instance %s" + " on node %s: %s", instance.name, dst_node.name, msg) nodelist = self.cfg.GetNodeList() nodelist.remove(dst_node.name) @@ -6426,15 +6569,17 @@ class LUExportInstance(LogicalUnit): # on one-node clusters nodelist will be empty after the removal # if we proceed the backup would be removed because OpQueryExports # substitutes an empty list with the full cluster node list. + iname = instance.name if nodelist: exportlist = self.rpc.call_export_list(nodelist) for node in exportlist: - if exportlist[node].failed: + if exportlist[node].RemoteFailMsg(): continue - if instance.name in exportlist[node].data: - if not self.rpc.call_export_remove(node, instance.name): + if iname in exportlist[node].payload: + msg = self.rpc.call_export_remove(node, iname).RemoteFailMsg() + if msg: self.LogWarning("Could not remove older export for instance %s" - " on node %s", instance.name, node) + " on node %s: %s", iname, node, msg) class LURemoveExport(NoHooksLU): @@ -6468,19 +6613,21 @@ class LURemoveExport(NoHooksLU): fqdn_warn = True instance_name = self.op.instance_name - exportlist = self.rpc.call_export_list(self.acquired_locks[ - locking.LEVEL_NODE]) + locked_nodes = self.acquired_locks[locking.LEVEL_NODE] + exportlist = self.rpc.call_export_list(locked_nodes) found = False for node in exportlist: - if exportlist[node].failed: - self.LogWarning("Failed to query node %s, continuing" % node) + msg = exportlist[node].RemoteFailMsg() + if msg: + self.LogWarning("Failed to query node %s (continuing): %s", node, msg) continue - if instance_name in exportlist[node].data: + if instance_name in exportlist[node].payload: found = True result = self.rpc.call_export_remove(node, instance_name) - if result.failed or not result.data: + msg = result.RemoteFailMsg() + if msg: logging.error("Could not remove export for instance %s" - " on node %s", instance_name, node) + " on node %s: %s", instance_name, node, msg) if fqdn_warn and not found: feedback_fn("Export not found. If trying to remove an export belonging" @@ -6800,29 +6947,33 @@ class IAllocator(object): } if not ninfo.offline: - nresult.Raise() - if not isinstance(nresult.data, dict): - raise errors.OpExecError("Can't get data for node %s" % nname) - remote_info = nresult.data + msg = nresult.RemoteFailMsg() + if msg: + raise errors.OpExecError("Can't get data for node %s: %s" % + (nname, msg)) + msg = node_iinfo[nname].RemoteFailMsg() + if msg: + raise errors.OpExecError("Can't get node instance info" + " from node %s: %s" % (nname, msg)) + remote_info = nresult.payload for attr in ['memory_total', 'memory_free', 'memory_dom0', 'vg_size', 'vg_free', 'cpu_total']: if attr not in remote_info: raise errors.OpExecError("Node '%s' didn't return attribute" " '%s'" % (nname, attr)) - try: - remote_info[attr] = int(remote_info[attr]) - except ValueError, err: + if not isinstance(remote_info[attr], int): raise errors.OpExecError("Node '%s' returned invalid value" - " for '%s': %s" % (nname, attr, err)) + " for '%s': %s" % + (nname, attr, remote_info[attr])) # compute memory used by primary instances i_p_mem = i_p_up_mem = 0 for iinfo, beinfo in i_list: if iinfo.primary_node == nname: i_p_mem += beinfo[constants.BE_MEMORY] - if iinfo.name not in node_iinfo[nname].data: + if iinfo.name not in node_iinfo[nname].payload: i_used_mem = 0 else: - i_used_mem = int(node_iinfo[nname].data[iinfo.name]['memory']) + i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory']) i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem remote_info['memory_free'] -= max(0, i_mem_diff) @@ -6848,8 +6999,19 @@ class IAllocator(object): # instance data instance_data = {} for iinfo, beinfo in i_list: - nic_data = [{"mac": n.mac, "ip": n.ip, "bridge": n.bridge} - for n in iinfo.nics] + nic_data = [] + for nic in iinfo.nics: + filled_params = objects.FillDict( + cluster_info.nicparams[constants.PP_DEFAULT], + nic.nicparams) + nic_dict = {"mac": nic.mac, + "ip": nic.ip, + "mode": filled_params[constants.NIC_MODE], + "link": filled_params[constants.NIC_LINK], + } + if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: + nic_dict["bridge"] = filled_params[constants.NIC_LINK] + nic_data.append(nic_dict) pir = { "tags": list(iinfo.GetTags()), "admin_up": iinfo.admin_up,