X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/8659b73ede8c50addb8c665911ffe049459c8665..b9bddb6bdf7ac5841c732845ce0122c64bb026b1:/lib/cmdlib.py diff --git a/lib/cmdlib.py b/lib/cmdlib.py index ca7ff68..f947f5e 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -30,6 +30,7 @@ import time import tempfile import re import platform +import logging from ganeti import rpc from ganeti import ssh @@ -38,11 +39,9 @@ from ganeti import utils from ganeti import errors from ganeti import hypervisor from ganeti import locking -from ganeti import config from ganeti import constants from ganeti import objects from ganeti import opcodes -from ganeti import ssconf from ganeti import serializer @@ -57,7 +56,6 @@ class LogicalUnit(object): - redefine HPATH and HTYPE - optionally redefine their run requirements: REQ_MASTER: the LU needs to run on the master node - REQ_WSSTORE: the LU needs a writable SimpleStore REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively Note that all commands require root permissions. @@ -67,10 +65,9 @@ class LogicalUnit(object): HTYPE = None _OP_REQP = [] REQ_MASTER = True - REQ_WSSTORE = False REQ_BGL = True - def __init__(self, processor, op, context, sstore): + def __init__(self, processor, op, context): """Constructor for LogicalUnit. This needs to be overriden in derived classes in order to check op @@ -80,9 +77,15 @@ class LogicalUnit(object): self.proc = processor self.op = op self.cfg = context.cfg - self.sstore = sstore self.context = context + # Dicts used to declare locking needs to mcpu self.needed_locks = None + self.acquired_locks = {} + self.share_locks = dict(((i, 0) for i in locking.LEVELS)) + self.add_locks = {} + self.remove_locks = {} + # Used to force good behavior when calling helper functions + self.recalculate_locks = {} self.__ssh = None for attr_name in self._OP_REQP: @@ -95,7 +98,7 @@ class LogicalUnit(object): raise errors.OpPrereqError("Cluster not initialized yet," " use 'gnt-cluster init' first.") if self.REQ_MASTER: - master = sstore.GetMasterNode() + master = self.cfg.GetMasterNode() if master != utils.HostInfo().name: raise errors.OpPrereqError("Commands must be run on the master" " node %s" % master) @@ -105,7 +108,7 @@ class LogicalUnit(object): """ if not self.__ssh: - self.__ssh = ssh.SshRunner(self.sstore) + self.__ssh = ssh.SshRunner(self.cfg.GetClusterName()) return self.__ssh ssh = property(fget=__GetSSH) @@ -124,15 +127,17 @@ class LogicalUnit(object): - Use an empty dict if you don't need any lock - If you don't need any lock at a particular level omit that level - Don't put anything for the BGL level - - If you want all locks at a level use None as a value - (this reflects what LockSet does, and will be replaced before - CheckPrereq with the full list of nodes that have been locked) + - If you want all locks at a level use locking.ALL_SET as a value + + If you need to share locks (rather than acquire them exclusively) at one + level you can modify self.share_locks, setting a true value (usually 1) for + that level. By default locks are not shared. Examples: # Acquire all nodes and one instance self.needed_locks = { - locking.LEVEL_NODE: None, - locking.LEVEL_INSTANCES: ['instance1.example.tld'], + locking.LEVEL_NODE: locking.ALL_SET, + locking.LEVEL_INSTANCE: ['instance1.example.tld'], } # Acquire just two nodes self.needed_locks = { @@ -150,6 +155,24 @@ class LogicalUnit(object): else: raise NotImplementedError + def DeclareLocks(self, level): + """Declare LU locking needs for a level + + While most LUs can just declare their locking needs at ExpandNames time, + sometimes there's the need to calculate some locks after having acquired + the ones before. This function is called just before acquiring locks at a + particular level, but after acquiring the ones at lower levels, and permits + such calculations. It can be used to modify self.needed_locks, and by + default it does nothing. + + This function is only called if you have something already set in + self.needed_locks for the level. + + @param level: Locking level which is going to be locked + @type level: member of ganeti.locking.LEVELS + + """ + def CheckPrereq(self): """Check prerequisites for this LU. @@ -239,6 +262,51 @@ class LogicalUnit(object): self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name self.op.instance_name = expanded_name + def _LockInstancesNodes(self, primary_only=False): + """Helper function to declare instances' nodes for locking. + + This function should be called after locking one or more instances to lock + their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE] + with all primary or secondary nodes for instances already locked and + present in self.needed_locks[locking.LEVEL_INSTANCE]. + + It should be called from DeclareLocks, and for safety only works if + self.recalculate_locks[locking.LEVEL_NODE] is set. + + In the future it may grow parameters to just lock some instance's nodes, or + to just lock primaries or secondary nodes, if needed. + + If should be called in DeclareLocks in a way similar to: + + if level == locking.LEVEL_NODE: + self._LockInstancesNodes() + + @type primary_only: boolean + @param primary_only: only lock primary nodes of locked instances + + """ + assert locking.LEVEL_NODE in self.recalculate_locks, \ + "_LockInstancesNodes helper function called with no nodes to recalculate" + + # TODO: check if we're really been called with the instance locks held + + # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the + # future we might want to have different behaviors depending on the value + # of self.recalculate_locks[locking.LEVEL_NODE] + wanted_nodes = [] + for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]: + instance = self.context.cfg.GetInstanceInfo(instance_name) + wanted_nodes.append(instance.primary_node) + if not primary_only: + wanted_nodes.extend(instance.secondary_nodes) + + if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE: + self.needed_locks[locking.LEVEL_NODE] = wanted_nodes + elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND: + self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes) + + del self.recalculate_locks[locking.LEVEL_NODE] + class NoHooksLU(LogicalUnit): """Simple LU which runs no hooks. @@ -261,17 +329,17 @@ def _GetWantedNodes(lu, nodes): if not isinstance(nodes, list): raise errors.OpPrereqError("Invalid argument type 'nodes'") - if nodes: - wanted = [] + if not nodes: + raise errors.ProgrammerError("_GetWantedNodes should only be called with a" + " non-empty list of nodes whose name is to be expanded.") - for name in nodes: - node = lu.cfg.ExpandNodeName(name) - if node is None: - raise errors.OpPrereqError("No such node name '%s'" % name) - wanted.append(node) + wanted = [] + for name in nodes: + node = lu.cfg.ExpandNodeName(name) + if node is None: + raise errors.OpPrereqError("No such node name '%s'" % name) + wanted.append(node) - else: - wanted = lu.cfg.GetNodeList() return utils.NiceSort(wanted) @@ -374,7 +442,7 @@ def _BuildInstanceHookEnvByObject(instance, override=None): return _BuildInstanceHookEnv(**args) -def _CheckInstanceBridgesExist(instance): +def _CheckInstanceBridgesExist(lu, instance): """Check that the brigdes needed by an instance exist. """ @@ -400,7 +468,7 @@ class LUDestroyCluster(NoHooksLU): Any errors are signalled by raising errors.OpPrereqError. """ - master = self.sstore.GetMasterNode() + master = self.cfg.GetMasterNode() nodelist = self.cfg.GetNodeList() if len(nodelist) != 1 or nodelist[0] != master: @@ -415,13 +483,13 @@ class LUDestroyCluster(NoHooksLU): """Destroys the cluster. """ - master = self.sstore.GetMasterNode() - if not rpc.call_node_stop_master(master): + master = self.cfg.GetMasterNode() + if not rpc.call_node_stop_master(master, False): raise errors.OpExecError("Could not disable the master role") priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS) utils.CreateBackup(priv_key) utils.CreateBackup(pub_key) - rpc.call_node_leave_cluster(master) + return master class LUVerifyCluster(LogicalUnit): @@ -431,6 +499,14 @@ class LUVerifyCluster(LogicalUnit): HPATH = "cluster-verify" HTYPE = constants.HTYPE_CLUSTER _OP_REQP = ["skip_checks"] + REQ_BGL = False + + def ExpandNames(self): + self.needed_locks = { + locking.LEVEL_NODE: locking.ALL_SET, + locking.LEVEL_INSTANCE: locking.ALL_SET, + } + self.share_locks = dict(((i, 1) for i in locking.LEVELS)) def _VerifyNode(self, node, file_list, local_cksum, vglist, node_result, remote_version, feedback_fn): @@ -510,8 +586,11 @@ class LUVerifyCluster(LogicalUnit): (node, node_result['node-net-test'][node])) hyp_result = node_result.get('hypervisor', None) - if hyp_result is not None: - feedback_fn(" - ERROR: hypervisor verify failure: '%s'" % hyp_result) + if isinstance(hyp_result, dict): + for hv_name, hv_result in hyp_result.iteritems(): + if hv_result is not None: + feedback_fn(" - ERROR: hypervisor %s verify failure: '%s'" % + (hv_name, hv_result)) return bad def _VerifyInstance(self, instance, instanceconfig, node_vol_is, @@ -645,6 +724,7 @@ class LUVerifyCluster(LogicalUnit): feedback_fn(" - ERROR: %s" % msg) vg_name = self.cfg.GetVGName() + hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors nodelist = utils.NiceSort(self.cfg.GetNodeList()) nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist] instancelist = utils.NiceSort(self.cfg.GetInstanceList()) @@ -656,25 +736,27 @@ class LUVerifyCluster(LogicalUnit): # FIXME: verify OS list # do local checksums - file_names = list(self.sstore.GetFileList()) + file_names = [] file_names.append(constants.SSL_CERT_FILE) file_names.append(constants.CLUSTER_CONF_FILE) local_checksums = utils.FingerprintFiles(file_names) feedback_fn("* Gathering data (%d nodes)" % len(nodelist)) all_volumeinfo = rpc.call_volume_list(nodelist, vg_name) - all_instanceinfo = rpc.call_instance_list(nodelist) + all_instanceinfo = rpc.call_instance_list(nodelist, hypervisors) all_vglist = rpc.call_vg_list(nodelist) node_verify_param = { 'filelist': file_names, 'nodelist': nodelist, - 'hypervisor': None, + 'hypervisor': hypervisors, 'node-net-test': [(node.name, node.primary_ip, node.secondary_ip) for node in nodeinfo] } - all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param) + all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param, + self.cfg.GetClusterName()) all_rversion = rpc.call_version(nodelist) - all_ninfo = rpc.call_node_info(nodelist, self.cfg.GetVGName()) + all_ninfo = rpc.call_node_info(nodelist, self.cfg.GetVGName(), + self.cfg.GetHypervisorType()) for node in nodelist: feedback_fn("* Verifying node %s" % node) @@ -795,7 +877,7 @@ class LUVerifyCluster(LogicalUnit): feedback_fn(" - NOTICE: %d non-redundant instance(s) found." % len(i_non_redundant)) - return int(bad) + return not bad def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result): """Analize the post-hooks' result, handle it, and send some @@ -808,7 +890,8 @@ class LUVerifyCluster(LogicalUnit): lu_result: previous Exec result """ - # We only really run POST phase hooks, and are only interested in their results + # We only really run POST phase hooks, and are only interested in + # their results if phase == constants.HOOKS_PHASE_POST: # Used to change hooks' output to proper indentation indent_re = re.compile('^', re.M) @@ -844,6 +927,14 @@ class LUVerifyDisks(NoHooksLU): """ _OP_REQP = [] + REQ_BGL = False + + def ExpandNames(self): + self.needed_locks = { + locking.LEVEL_NODE: locking.ALL_SET, + locking.LEVEL_INSTANCE: locking.ALL_SET, + } + self.share_locks = dict(((i, 1) for i in locking.LEVELS)) def CheckPrereq(self): """Check prerequisites. @@ -918,17 +1009,16 @@ class LURenameCluster(LogicalUnit): HPATH = "cluster-rename" HTYPE = constants.HTYPE_CLUSTER _OP_REQP = ["name"] - REQ_WSSTORE = True def BuildHooksEnv(self): """Build hooks env. """ env = { - "OP_TARGET": self.sstore.GetClusterName(), + "OP_TARGET": self.cfg.GetClusterName(), "NEW_NAME": self.op.name, } - mn = self.sstore.GetMasterNode() + mn = self.cfg.GetMasterNode() return env, [mn], [mn] def CheckPrereq(self): @@ -939,8 +1029,8 @@ class LURenameCluster(LogicalUnit): new_name = hostname.name self.ip = new_ip = hostname.ip - old_name = self.sstore.GetClusterName() - old_ip = self.sstore.GetMasterIP() + old_name = self.cfg.GetClusterName() + old_ip = self.cfg.GetMasterIP() if new_name == old_name and new_ip == old_ip: raise errors.OpPrereqError("Neither the name nor the IP address of the" " cluster has changed") @@ -958,15 +1048,15 @@ class LURenameCluster(LogicalUnit): """ clustername = self.op.name ip = self.ip - ss = self.sstore # shutdown the master IP - master = ss.GetMasterNode() - if not rpc.call_node_stop_master(master): + master = self.cfg.GetMasterNode() + if not rpc.call_node_stop_master(master, False): raise errors.OpExecError("Could not disable the master role") try: # modify the sstore + # TODO: sstore ss.SetKey(ss.SS_MASTER_IP, ip) ss.SetKey(ss.SS_CLUSTER_NAME, clustername) @@ -985,7 +1075,7 @@ class LURenameCluster(LogicalUnit): logger.Error("copy of file %s to node %s failed" % (fname, to_node)) finally: - if not rpc.call_node_start_master(master): + if not rpc.call_node_start_master(master, False): logger.Error("Could not re-enable the master role on the master," " please restart manually.") @@ -1014,16 +1104,25 @@ class LUSetClusterParams(LogicalUnit): HPATH = "cluster-modify" HTYPE = constants.HTYPE_CLUSTER _OP_REQP = [] + REQ_BGL = False + + def ExpandNames(self): + # FIXME: in the future maybe other cluster params won't require checking on + # all nodes to be modified. + self.needed_locks = { + locking.LEVEL_NODE: locking.ALL_SET, + } + self.share_locks[locking.LEVEL_NODE] = 1 def BuildHooksEnv(self): """Build hooks env. """ env = { - "OP_TARGET": self.sstore.GetClusterName(), + "OP_TARGET": self.cfg.GetClusterName(), "NEW_VG_NAME": self.op.vg_name, } - mn = self.sstore.GetMasterNode() + mn = self.cfg.GetMasterNode() return env, [mn], [mn] def CheckPrereq(self): @@ -1033,9 +1132,10 @@ class LUSetClusterParams(LogicalUnit): if the given volume group is valid. """ + # FIXME: This only works because there is only one parameter that can be + # changed or removed. if not self.op.vg_name: - instances = [self.cfg.GetInstanceInfo(name) - for name in self.cfg.GetInstanceList()] + instances = self.cfg.GetAllInstancesInfo().values() for inst in instances: for disk in inst.disks: if _RecursiveCheckIfLVMBased(disk): @@ -1044,7 +1144,7 @@ class LUSetClusterParams(LogicalUnit): # if vg_name not None, checks given volume group on all nodes if self.op.vg_name: - node_list = self.cfg.GetNodeList() + node_list = self.acquired_locks[locking.LEVEL_NODE] vglist = rpc.call_vg_list(node_list) for node in node_list: vgstatus = utils.CheckVolumeGroupSize(vglist[node], self.op.vg_name, @@ -1064,7 +1164,7 @@ class LUSetClusterParams(LogicalUnit): " state, not changing") -def _WaitForSync(cfgw, instance, proc, oneshot=False, unlock=False): +def _WaitForSync(lu, instance, oneshot=False, unlock=False): """Sleep and poll for an instance's disk to sync. """ @@ -1072,12 +1172,12 @@ def _WaitForSync(cfgw, instance, proc, oneshot=False, unlock=False): return True if not oneshot: - proc.LogInfo("Waiting for instance %s to sync disks." % instance.name) + lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name) node = instance.primary_node for dev in instance.disks: - cfgw.SetDiskID(dev, node) + lu.cfg.SetDiskID(dev, node) retries = 0 while True: @@ -1086,7 +1186,7 @@ def _WaitForSync(cfgw, instance, proc, oneshot=False, unlock=False): cumul_degraded = False rstats = rpc.call_blockdev_getmirrorstatus(node, instance.disks) if not rstats: - proc.LogWarning("Can't get any data from node %s" % node) + lu.proc.LogWarning("Can't get any data from node %s" % node) retries += 1 if retries >= 10: raise errors.RemoteError("Can't contact node %s for mirror data," @@ -1097,8 +1197,8 @@ def _WaitForSync(cfgw, instance, proc, oneshot=False, unlock=False): for i in range(len(rstats)): mstat = rstats[i] if mstat is None: - proc.LogWarning("Can't compute data for node %s/%s" % - (node, instance.disks[i].iv_name)) + lu.proc.LogWarning("Can't compute data for node %s/%s" % + (node, instance.disks[i].iv_name)) continue # we ignore the ldisk parameter perc_done, est_time, is_degraded, _ = mstat @@ -1110,19 +1210,19 @@ def _WaitForSync(cfgw, instance, proc, oneshot=False, unlock=False): max_time = est_time else: rem_time = "no time estimate" - proc.LogInfo("- device %s: %5.2f%% done, %s" % - (instance.disks[i].iv_name, perc_done, rem_time)) + lu.proc.LogInfo("- device %s: %5.2f%% done, %s" % + (instance.disks[i].iv_name, perc_done, rem_time)) if done or oneshot: break time.sleep(min(60, max_time)) if done: - proc.LogInfo("Instance %s's disks are in sync." % instance.name) + lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name) return not cumul_degraded -def _CheckDiskConsistency(cfgw, dev, node, on_primary, ldisk=False): +def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False): """Check that mirrors are not degraded. The ldisk parameter, if True, will change the test from the @@ -1130,7 +1230,7 @@ def _CheckDiskConsistency(cfgw, dev, node, on_primary, ldisk=False): the device(s)) to the ldisk (representing the local storage status). """ - cfgw.SetDiskID(dev, node) + lu.cfg.SetDiskID(dev, node) if ldisk: idx = 6 else: @@ -1146,7 +1246,7 @@ def _CheckDiskConsistency(cfgw, dev, node, on_primary, ldisk=False): result = result and (not rstats[idx]) if dev.children: for child in dev.children: - result = result and _CheckDiskConsistency(cfgw, child, node, on_primary) + result = result and _CheckDiskConsistency(lu, child, node, on_primary) return result @@ -1156,13 +1256,9 @@ class LUDiagnoseOS(NoHooksLU): """ _OP_REQP = ["output_fields", "names"] + REQ_BGL = False - def CheckPrereq(self): - """Check prerequisites. - - This always succeeds, since this is a pure query LU. - - """ + def ExpandNames(self): if self.op.names: raise errors.OpPrereqError("Selective OS query not supported") @@ -1171,6 +1267,16 @@ class LUDiagnoseOS(NoHooksLU): dynamic=self.dynamic_fields, selected=self.op.output_fields) + # Lock all nodes, in shared mode + self.needed_locks = {} + self.share_locks[locking.LEVEL_NODE] = 1 + self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET + + def CheckPrereq(self): + """Check prerequisites. + + """ + @staticmethod def _DiagnoseByOS(node_list, rlist): """Remaps a per-node return list into an a per-os per-node dictionary @@ -1206,7 +1312,7 @@ class LUDiagnoseOS(NoHooksLU): """Compute the list of OSes. """ - node_list = self.cfg.GetNodeList() + node_list = self.acquired_locks[locking.LEVEL_NODE] node_data = rpc.call_os_diagnose(node_list) if node_data == False: raise errors.OpExecError("Can't gather the list of OSes") @@ -1271,7 +1377,7 @@ class LURemoveNode(LogicalUnit): instance_list = self.cfg.GetInstanceList() - masternode = self.sstore.GetMasterNode() + masternode = self.cfg.GetMasterNode() if node.name == masternode: raise errors.OpPrereqError("Node is the master node," " you need to failover first.") @@ -1295,15 +1401,9 @@ class LURemoveNode(LogicalUnit): logger.Info("stopping the node daemon and removing configs from node %s" % node.name) - rpc.call_node_leave_cluster(node.name) - - logger.Info("Removing node %s from config" % node.name) + self.context.RemoveNode(node.name) - self.cfg.RemoveNode(node.name) - # Remove the node from the Ganeti Lock Manager - self.context.glm.remove(locking.LEVEL_NODE, node.name) - - utils.RemoveHostFromEtcHosts(node.name) + rpc.call_node_leave_cluster(node.name) class LUQueryNodes(NoHooksLU): @@ -1311,13 +1411,9 @@ class LUQueryNodes(NoHooksLU): """ _OP_REQP = ["output_fields", "names"] + REQ_BGL = False - def CheckPrereq(self): - """Check prerequisites. - - This checks that the fields required are valid output fields. - - """ + def ExpandNames(self): self.dynamic_fields = frozenset([ "dtotal", "dfree", "mtotal", "mnode", "mfree", @@ -1325,26 +1421,62 @@ class LUQueryNodes(NoHooksLU): "ctotal", ]) - _CheckOutputFields(static=["name", "pinst_cnt", "sinst_cnt", - "pinst_list", "sinst_list", - "pip", "sip", "tags"], + self.static_fields = frozenset([ + "name", "pinst_cnt", "sinst_cnt", + "pinst_list", "sinst_list", + "pip", "sip", "tags", + "serial_no", + ]) + + _CheckOutputFields(static=self.static_fields, dynamic=self.dynamic_fields, selected=self.op.output_fields) - self.wanted = _GetWantedNodes(self, self.op.names) + self.needed_locks = {} + self.share_locks[locking.LEVEL_NODE] = 1 + + if self.op.names: + self.wanted = _GetWantedNodes(self, self.op.names) + else: + self.wanted = locking.ALL_SET + + self.do_locking = not self.static_fields.issuperset(self.op.output_fields) + if self.do_locking: + # if we don't request only static fields, we need to lock the nodes + self.needed_locks[locking.LEVEL_NODE] = self.wanted + + + def CheckPrereq(self): + """Check prerequisites. + + """ + # The validation of the node list is done in the _GetWantedNodes, + # if non empty, and if empty, there's no validation to do + pass def Exec(self, feedback_fn): """Computes the list of nodes and their attributes. """ - nodenames = self.wanted - nodelist = [self.cfg.GetNodeInfo(name) for name in nodenames] + all_info = self.cfg.GetAllNodesInfo() + if self.do_locking: + nodenames = self.acquired_locks[locking.LEVEL_NODE] + elif self.wanted != locking.ALL_SET: + nodenames = self.wanted + missing = set(nodenames).difference(all_info.keys()) + if missing: + raise errors.OpExecError( + "Some nodes were removed before retrieving their data: %s" % missing) + else: + nodenames = all_info.keys() + nodelist = [all_info[name] for name in nodenames] # begin data gathering if self.dynamic_fields.intersection(self.op.output_fields): live_data = {} - node_data = rpc.call_node_info(nodenames, self.cfg.GetVGName()) + node_data = rpc.call_node_info(nodenames, self.cfg.GetVGName(), + self.cfg.GetHypervisorType()) for name in nodenames: nodeinfo = node_data.get(name, None) if nodeinfo: @@ -1400,6 +1532,8 @@ class LUQueryNodes(NoHooksLU): val = node.secondary_ip elif field == "tags": val = list(node.GetTags()) + elif field == "serial_no": + val = node.serial_no elif field in self.dynamic_fields: val = live_data[node.name].get(field, None) else: @@ -1415,6 +1549,20 @@ class LUQueryNodeVolumes(NoHooksLU): """ _OP_REQP = ["nodes", "output_fields"] + REQ_BGL = False + + def ExpandNames(self): + _CheckOutputFields(static=["node"], + dynamic=["phys", "vg", "name", "size", "instance"], + selected=self.op.output_fields) + + self.needed_locks = {} + self.share_locks[locking.LEVEL_NODE] = 1 + if not self.op.nodes: + self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET + else: + self.needed_locks[locking.LEVEL_NODE] = \ + _GetWantedNodes(self, self.op.nodes) def CheckPrereq(self): """Check prerequisites. @@ -1422,12 +1570,7 @@ class LUQueryNodeVolumes(NoHooksLU): This checks that the fields required are valid output fields. """ - self.nodes = _GetWantedNodes(self, self.op.nodes) - - _CheckOutputFields(static=["node"], - dynamic=["phys", "vg", "name", "size", "instance"], - selected=self.op.output_fields) - + self.nodes = self.acquired_locks[locking.LEVEL_NODE] def Exec(self, feedback_fn): """Computes the list of nodes and their attributes. @@ -1555,7 +1698,7 @@ class LUAddNode(LogicalUnit): # check that the type of the node (single versus dual homed) is the # same as for the master - myself = cfg.GetNodeInfo(self.sstore.GetMasterNode()) + myself = cfg.GetNodeInfo(self.cfg.GetMasterNode()) master_singlehomed = myself.secondary_ip == myself.primary_ip newbie_singlehomed = secondary_ip == primary_ip if master_singlehomed != newbie_singlehomed: @@ -1635,13 +1778,14 @@ class LUAddNode(LogicalUnit): " you gave (%s). Please fix and re-run this" " command." % new_node.secondary_ip) - node_verify_list = [self.sstore.GetMasterNode()] + node_verify_list = [self.cfg.GetMasterNode()] node_verify_param = { 'nodelist': [node], # TODO: do a node-net-test as well? } - result = rpc.call_node_verify(node_verify_list, node_verify_param) + result = rpc.call_node_verify(node_verify_list, node_verify_param, + self.cfg.GetClusterName()) for verifier in node_verify_list: if not result[verifier]: raise errors.OpExecError("Cannot communicate with %s's node daemon" @@ -1654,7 +1798,7 @@ class LUAddNode(LogicalUnit): # Distribute updated /etc/hosts and known_hosts to all nodes, # including the node just added - myself = self.cfg.GetNodeInfo(self.sstore.GetMasterNode()) + myself = self.cfg.GetNodeInfo(self.cfg.GetMasterNode()) dist_nodes = self.cfg.GetNodeList() if not self.op.readd: dist_nodes.append(node) @@ -1669,91 +1813,18 @@ class LUAddNode(LogicalUnit): logger.Error("copy of file %s to node %s failed" % (fname, to_node)) - to_copy = self.sstore.GetFileList() - if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31: + to_copy = [] + if constants.HT_XEN_HVM in self.cfg.GetClusterInfo().enabled_hypervisors: to_copy.append(constants.VNC_PASSWORD_FILE) for fname in to_copy: result = rpc.call_upload_file([node], fname) if not result[node]: logger.Error("could not copy file %s to node %s" % (fname, node)) - if not self.op.readd: - logger.Info("adding node %s to cluster.conf" % node) - self.cfg.AddNode(new_node) - # Add the new node to the Ganeti Lock Manager - self.context.glm.add(locking.LEVEL_NODE, node) - - -class LUMasterFailover(LogicalUnit): - """Failover the master node to the current node. - - This is a special LU in that it must run on a non-master node. - - """ - HPATH = "master-failover" - HTYPE = constants.HTYPE_CLUSTER - REQ_MASTER = False - REQ_WSSTORE = True - _OP_REQP = [] - - def BuildHooksEnv(self): - """Build hooks env. - - This will run on the new master only in the pre phase, and on all - the nodes in the post phase. - - """ - env = { - "OP_TARGET": self.new_master, - "NEW_MASTER": self.new_master, - "OLD_MASTER": self.old_master, - } - return env, [self.new_master], self.cfg.GetNodeList() - - def CheckPrereq(self): - """Check prerequisites. - - This checks that we are not already the master. - - """ - self.new_master = utils.HostInfo().name - self.old_master = self.sstore.GetMasterNode() - - if self.old_master == self.new_master: - raise errors.OpPrereqError("This commands must be run on the node" - " where you want the new master to be." - " %s is already the master" % - self.old_master) - - def Exec(self, feedback_fn): - """Failover the master node. - - This command, when run on a non-master node, will cause the current - master to cease being master, and the non-master to become new - master. - - """ - #TODO: do not rely on gethostname returning the FQDN - logger.Info("setting master to %s, old master: %s" % - (self.new_master, self.old_master)) - - if not rpc.call_node_stop_master(self.old_master): - logger.Error("could disable the master role on the old master" - " %s, please disable manually" % self.old_master) - - ss = self.sstore - ss.SetKey(ss.SS_MASTER_NODE, self.new_master) - if not rpc.call_upload_file(self.cfg.GetNodeList(), - ss.KeyToFilename(ss.SS_MASTER_NODE)): - logger.Error("could not distribute the new simple store master file" - " to the other nodes, please check.") - - if not rpc.call_node_start_master(self.new_master): - logger.Error("could not start the master role on the new master" - " %s, please check" % self.new_master) - feedback_fn("Error in activating the master IP on the new master," - " please fix manually.") - + if self.op.readd: + self.context.ReaddNode(new_node) + else: + self.context.AddNode(new_node) class LUQueryClusterInfo(NoHooksLU): @@ -1778,22 +1849,23 @@ class LUQueryClusterInfo(NoHooksLU): """ result = { - "name": self.sstore.GetClusterName(), + "name": self.cfg.GetClusterName(), "software_version": constants.RELEASE_VERSION, "protocol_version": constants.PROTOCOL_VERSION, "config_version": constants.CONFIG_VERSION, "os_api_version": constants.OS_API_VERSION, "export_version": constants.EXPORT_VERSION, - "master": self.sstore.GetMasterNode(), + "master": self.cfg.GetMasterNode(), "architecture": (platform.architecture()[0], platform.machine()), - "hypervisor_type": self.sstore.GetHypervisorType(), + "hypervisor_type": self.cfg.GetHypervisorType(), + "enabled_hypervisors": self.cfg.GetClusterInfo().enabled_hypervisors, } return result -class LUDumpClusterConfig(NoHooksLU): - """Return a text-representation of the cluster-config. +class LUQueryConfigValues(NoHooksLU): + """Return configuration values. """ _OP_REQP = [] @@ -1802,6 +1874,11 @@ class LUDumpClusterConfig(NoHooksLU): def ExpandNames(self): self.needed_locks = {} + static_fields = ["cluster_name", "master_node"] + _CheckOutputFields(static=static_fields, + dynamic=[], + selected=self.op.output_fields) + def CheckPrereq(self): """No prerequisites. @@ -1812,7 +1889,15 @@ class LUDumpClusterConfig(NoHooksLU): """Dump a representation of the cluster config to the standard output. """ - return self.cfg.DumpConfig() + values = [] + for field in self.op.output_fields: + if field == "cluster_name": + values.append(self.cfg.GetClusterName()) + elif field == "master_node": + values.append(self.cfg.GetMasterNode()) + else: + raise errors.ParameterError(field) + return values class LUActivateInstanceDisks(NoHooksLU): @@ -1820,6 +1905,16 @@ class LUActivateInstanceDisks(NoHooksLU): """ _OP_REQP = ["instance_name"] + REQ_BGL = False + + def ExpandNames(self): + self._ExpandAndLockInstance() + self.needed_locks[locking.LEVEL_NODE] = [] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + + def DeclareLocks(self, level): + if level == locking.LEVEL_NODE: + self._LockInstancesNodes() def CheckPrereq(self): """Check prerequisites. @@ -1827,26 +1922,22 @@ class LUActivateInstanceDisks(NoHooksLU): This checks that the instance is in the cluster. """ - instance = self.cfg.GetInstanceInfo( - self.cfg.ExpandInstanceName(self.op.instance_name)) - if instance is None: - raise errors.OpPrereqError("Instance '%s' not known" % - self.op.instance_name) - self.instance = instance - + self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) + assert self.instance is not None, \ + "Cannot retrieve locked instance %s" % self.op.instance_name def Exec(self, feedback_fn): """Activate the disks. """ - disks_ok, disks_info = _AssembleInstanceDisks(self.instance, self.cfg) + disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance) if not disks_ok: raise errors.OpExecError("Cannot activate block devices") return disks_info -def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False): +def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False): """Prepare the block devices for an instance. This sets up the block devices on all nodes. @@ -1876,7 +1967,7 @@ def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False): # 1st pass, assemble on all nodes in secondary mode for inst_disk in instance.disks: for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): - cfg.SetDiskID(node_disk, node) + lu.cfg.SetDiskID(node_disk, node) result = rpc.call_blockdev_assemble(node, node_disk, iname, False) if not result: logger.Error("could not prepare block device %s on node %s" @@ -1891,7 +1982,7 @@ def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False): for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): if node != instance.primary_node: continue - cfg.SetDiskID(node_disk, node) + lu.cfg.SetDiskID(node_disk, node) result = rpc.call_blockdev_assemble(node, node_disk, iname, True) if not result: logger.Error("could not prepare block device %s on node %s" @@ -1903,19 +1994,19 @@ def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False): # this is a workaround that would be fixed better by # improving the logical/physical id handling for disk in instance.disks: - cfg.SetDiskID(disk, instance.primary_node) + lu.cfg.SetDiskID(disk, instance.primary_node) return disks_ok, device_info -def _StartInstanceDisks(cfg, instance, force): +def _StartInstanceDisks(lu, instance, force): """Start the disks of an instance. """ - disks_ok, dummy = _AssembleInstanceDisks(instance, cfg, + disks_ok, dummy = _AssembleInstanceDisks(lu, instance, ignore_secondaries=force) if not disks_ok: - _ShutdownInstanceDisks(instance, cfg) + _ShutdownInstanceDisks(lu, instance) if force is not None and not force: logger.Error("If the message above refers to a secondary node," " you can retry the operation using '--force'.") @@ -1927,6 +2018,16 @@ class LUDeactivateInstanceDisks(NoHooksLU): """ _OP_REQP = ["instance_name"] + REQ_BGL = False + + def ExpandNames(self): + self._ExpandAndLockInstance() + self.needed_locks[locking.LEVEL_NODE] = [] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + + def DeclareLocks(self, level): + if level == locking.LEVEL_NODE: + self._LockInstancesNodes() def CheckPrereq(self): """Check prerequisites. @@ -1934,32 +2035,40 @@ class LUDeactivateInstanceDisks(NoHooksLU): This checks that the instance is in the cluster. """ - instance = self.cfg.GetInstanceInfo( - self.cfg.ExpandInstanceName(self.op.instance_name)) - if instance is None: - raise errors.OpPrereqError("Instance '%s' not known" % - self.op.instance_name) - self.instance = instance + self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) + assert self.instance is not None, \ + "Cannot retrieve locked instance %s" % self.op.instance_name def Exec(self, feedback_fn): """Deactivate the disks """ instance = self.instance - ins_l = rpc.call_instance_list([instance.primary_node]) - ins_l = ins_l[instance.primary_node] - if not type(ins_l) is list: - raise errors.OpExecError("Can't contact node '%s'" % - instance.primary_node) + _SafeShutdownInstanceDisks(self, instance) + - if self.instance.name in ins_l: - raise errors.OpExecError("Instance is running, can't shutdown" - " block devices.") +def _SafeShutdownInstanceDisks(lu, instance): + """Shutdown block devices of an instance. + + This function checks if an instance is running, before calling + _ShutdownInstanceDisks. + + """ + ins_l = rpc.call_instance_list([instance.primary_node], + [instance.hypervisor]) + ins_l = ins_l[instance.primary_node] + if not type(ins_l) is list: + raise errors.OpExecError("Can't contact node '%s'" % + instance.primary_node) - _ShutdownInstanceDisks(instance, self.cfg) + if instance.name in ins_l: + raise errors.OpExecError("Instance is running, can't shutdown" + " block devices.") + _ShutdownInstanceDisks(lu, instance) -def _ShutdownInstanceDisks(instance, cfg, ignore_primary=False): + +def _ShutdownInstanceDisks(lu, instance, ignore_primary=False): """Shutdown block devices of an instance. This does the shutdown on all nodes of the instance. @@ -1971,7 +2080,7 @@ def _ShutdownInstanceDisks(instance, cfg, ignore_primary=False): result = True for disk in instance.disks: for node, top_disk in disk.ComputeNodeTree(instance.primary_node): - cfg.SetDiskID(top_disk, node) + lu.cfg.SetDiskID(top_disk, node) if not rpc.call_blockdev_shutdown(node, top_disk): logger.Error("could not shutdown block device %s on node %s" % (disk.iv_name, node)) @@ -1980,7 +2089,7 @@ def _ShutdownInstanceDisks(instance, cfg, ignore_primary=False): return result -def _CheckNodeFreeMemory(cfg, node, reason, requested): +def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor): """Checks if a node has enough free memory. This function check if a given node has the needed amount of free @@ -1988,14 +2097,21 @@ def _CheckNodeFreeMemory(cfg, node, reason, requested): information from the node, this function raise an OpPrereqError exception. - Args: - - cfg: a ConfigWriter instance - - node: the node name - - reason: string to use in the error message - - requested: the amount of memory in MiB + @type lu: C{LogicalUnit} + @param lu: a logical unit from which we get configuration data + @type node: C{str} + @param node: the node to check + @type reason: C{str} + @param reason: string to use in the error message + @type requested: C{int} + @param requested: the amount of memory in MiB to check for + @type hypervisor: C{str} + @param hypervisor: the hypervisor to ask for memory stats + @raise errors.OpPrereqError: if the node doesn't have enough memory, or + we cannot check the node """ - nodeinfo = rpc.call_node_info([node], cfg.GetVGName()) + nodeinfo = rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor) if not nodeinfo or not isinstance(nodeinfo, dict): raise errors.OpPrereqError("Could not contact node %s for resource" " information" % (node,)) @@ -2017,6 +2133,16 @@ class LUStartupInstance(LogicalUnit): HPATH = "instance-start" HTYPE = constants.HTYPE_INSTANCE _OP_REQP = ["instance_name", "force"] + REQ_BGL = False + + def ExpandNames(self): + self._ExpandAndLockInstance() + self.needed_locks[locking.LEVEL_NODE] = [] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + + def DeclareLocks(self, level): + if level == locking.LEVEL_NODE: + self._LockInstancesNodes() def BuildHooksEnv(self): """Build hooks env. @@ -2028,7 +2154,7 @@ class LUStartupInstance(LogicalUnit): "FORCE": self.op.force, } env.update(_BuildInstanceHookEnvByObject(self.instance)) - nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] + + nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] + list(self.instance.secondary_nodes)) return env, nl, nl @@ -2038,21 +2164,16 @@ class LUStartupInstance(LogicalUnit): This checks that the instance is in the cluster. """ - instance = self.cfg.GetInstanceInfo( - self.cfg.ExpandInstanceName(self.op.instance_name)) - if instance is None: - raise errors.OpPrereqError("Instance '%s' not known" % - self.op.instance_name) + self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) + assert self.instance is not None, \ + "Cannot retrieve locked instance %s" % self.op.instance_name # check bridges existance - _CheckInstanceBridgesExist(instance) + _CheckInstanceBridgesExist(self, instance) - _CheckNodeFreeMemory(self.cfg, instance.primary_node, + _CheckNodeFreeMemory(self, instance.primary_node, "starting instance %s" % instance.name, - instance.memory) - - self.instance = instance - self.op.instance_name = instance.name + instance.memory, instance.hypervisor) def Exec(self, feedback_fn): """Start the instance. @@ -2066,10 +2187,10 @@ class LUStartupInstance(LogicalUnit): node_current = instance.primary_node - _StartInstanceDisks(self.cfg, instance, force) + _StartInstanceDisks(self, instance, force) if not rpc.call_instance_start(node_current, instance, extra_args): - _ShutdownInstanceDisks(instance, self.cfg) + _ShutdownInstanceDisks(self, instance) raise errors.OpExecError("Could not start instance") @@ -2080,6 +2201,24 @@ class LURebootInstance(LogicalUnit): HPATH = "instance-reboot" HTYPE = constants.HTYPE_INSTANCE _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"] + REQ_BGL = False + + def ExpandNames(self): + if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT, + constants.INSTANCE_REBOOT_HARD, + constants.INSTANCE_REBOOT_FULL]: + raise errors.ParameterError("reboot type not in [%s, %s, %s]" % + (constants.INSTANCE_REBOOT_SOFT, + constants.INSTANCE_REBOOT_HARD, + constants.INSTANCE_REBOOT_FULL)) + self._ExpandAndLockInstance() + self.needed_locks[locking.LEVEL_NODE] = [] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + + def DeclareLocks(self, level): + if level == locking.LEVEL_NODE: + primary_only = not constants.INSTANCE_REBOOT_FULL + self._LockInstancesNodes(primary_only=primary_only) def BuildHooksEnv(self): """Build hooks env. @@ -2091,7 +2230,7 @@ class LURebootInstance(LogicalUnit): "IGNORE_SECONDARIES": self.op.ignore_secondaries, } env.update(_BuildInstanceHookEnvByObject(self.instance)) - nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] + + nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] + list(self.instance.secondary_nodes)) return env, nl, nl @@ -2101,17 +2240,12 @@ class LURebootInstance(LogicalUnit): This checks that the instance is in the cluster. """ - instance = self.cfg.GetInstanceInfo( - self.cfg.ExpandInstanceName(self.op.instance_name)) - if instance is None: - raise errors.OpPrereqError("Instance '%s' not known" % - self.op.instance_name) + self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) + assert self.instance is not None, \ + "Cannot retrieve locked instance %s" % self.op.instance_name # check bridges existance - _CheckInstanceBridgesExist(instance) - - self.instance = instance - self.op.instance_name = instance.name + _CheckInstanceBridgesExist(self, instance) def Exec(self, feedback_fn): """Reboot the instance. @@ -2124,14 +2258,6 @@ class LURebootInstance(LogicalUnit): node_current = instance.primary_node - if reboot_type not in [constants.INSTANCE_REBOOT_SOFT, - constants.INSTANCE_REBOOT_HARD, - constants.INSTANCE_REBOOT_FULL]: - raise errors.ParameterError("reboot type not in [%s, %s, %s]" % - (constants.INSTANCE_REBOOT_SOFT, - constants.INSTANCE_REBOOT_HARD, - constants.INSTANCE_REBOOT_FULL)) - if reboot_type in [constants.INSTANCE_REBOOT_SOFT, constants.INSTANCE_REBOOT_HARD]: if not rpc.call_instance_reboot(node_current, instance, @@ -2140,10 +2266,10 @@ class LURebootInstance(LogicalUnit): else: if not rpc.call_instance_shutdown(node_current, instance): raise errors.OpExecError("could not shutdown instance for full reboot") - _ShutdownInstanceDisks(instance, self.cfg) - _StartInstanceDisks(self.cfg, instance, ignore_secondaries) + _ShutdownInstanceDisks(self, instance) + _StartInstanceDisks(self, instance, ignore_secondaries) if not rpc.call_instance_start(node_current, instance, extra_args): - _ShutdownInstanceDisks(instance, self.cfg) + _ShutdownInstanceDisks(self, instance) raise errors.OpExecError("Could not start instance for full reboot") self.cfg.MarkInstanceUp(instance.name) @@ -2156,6 +2282,16 @@ class LUShutdownInstance(LogicalUnit): HPATH = "instance-stop" HTYPE = constants.HTYPE_INSTANCE _OP_REQP = ["instance_name"] + REQ_BGL = False + + def ExpandNames(self): + self._ExpandAndLockInstance() + self.needed_locks[locking.LEVEL_NODE] = [] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + + def DeclareLocks(self, level): + if level == locking.LEVEL_NODE: + self._LockInstancesNodes() def BuildHooksEnv(self): """Build hooks env. @@ -2164,7 +2300,7 @@ class LUShutdownInstance(LogicalUnit): """ env = _BuildInstanceHookEnvByObject(self.instance) - nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] + + nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] + list(self.instance.secondary_nodes)) return env, nl, nl @@ -2174,12 +2310,9 @@ class LUShutdownInstance(LogicalUnit): This checks that the instance is in the cluster. """ - instance = self.cfg.GetInstanceInfo( - self.cfg.ExpandInstanceName(self.op.instance_name)) - if instance is None: - raise errors.OpPrereqError("Instance '%s' not known" % - self.op.instance_name) - self.instance = instance + self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) + assert self.instance is not None, \ + "Cannot retrieve locked instance %s" % self.op.instance_name def Exec(self, feedback_fn): """Shutdown the instance. @@ -2191,7 +2324,7 @@ class LUShutdownInstance(LogicalUnit): if not rpc.call_instance_shutdown(node_current, instance): logger.Error("could not shutdown instance") - _ShutdownInstanceDisks(instance, self.cfg) + _ShutdownInstanceDisks(self, instance) class LUReinstallInstance(LogicalUnit): @@ -2201,6 +2334,16 @@ class LUReinstallInstance(LogicalUnit): HPATH = "instance-reinstall" HTYPE = constants.HTYPE_INSTANCE _OP_REQP = ["instance_name"] + REQ_BGL = False + + def ExpandNames(self): + self._ExpandAndLockInstance() + self.needed_locks[locking.LEVEL_NODE] = [] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + + def DeclareLocks(self, level): + if level == locking.LEVEL_NODE: + self._LockInstancesNodes() def BuildHooksEnv(self): """Build hooks env. @@ -2209,7 +2352,7 @@ class LUReinstallInstance(LogicalUnit): """ env = _BuildInstanceHookEnvByObject(self.instance) - nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] + + nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] + list(self.instance.secondary_nodes)) return env, nl, nl @@ -2219,18 +2362,18 @@ class LUReinstallInstance(LogicalUnit): This checks that the instance is in the cluster and is not running. """ - instance = self.cfg.GetInstanceInfo( - self.cfg.ExpandInstanceName(self.op.instance_name)) - if instance is None: - raise errors.OpPrereqError("Instance '%s' not known" % - self.op.instance_name) + instance = self.cfg.GetInstanceInfo(self.op.instance_name) + assert instance is not None, \ + "Cannot retrieve locked instance %s" % self.op.instance_name + if instance.disk_template == constants.DT_DISKLESS: raise errors.OpPrereqError("Instance '%s' has no disks" % self.op.instance_name) if instance.status != "down": raise errors.OpPrereqError("Instance '%s' is marked to be up" % self.op.instance_name) - remote_info = rpc.call_instance_info(instance.primary_node, instance.name) + remote_info = rpc.call_instance_info(instance.primary_node, instance.name, + instance.hypervisor) if remote_info: raise errors.OpPrereqError("Instance '%s' is running on the node %s" % (self.op.instance_name, @@ -2260,9 +2403,9 @@ class LUReinstallInstance(LogicalUnit): if self.op.os_type is not None: feedback_fn("Changing OS to '%s'..." % self.op.os_type) inst.os = self.op.os_type - self.cfg.AddInstance(inst) + self.cfg.Update(inst) - _StartInstanceDisks(self.cfg, inst, None) + _StartInstanceDisks(self, inst, None) try: feedback_fn("Running the instance OS create scripts...") if not rpc.call_instance_os_add(inst.primary_node, inst, "sda", "sdb"): @@ -2270,7 +2413,7 @@ class LUReinstallInstance(LogicalUnit): " on node %s" % (inst.name, inst.primary_node)) finally: - _ShutdownInstanceDisks(inst, self.cfg) + _ShutdownInstanceDisks(self, inst) class LURenameInstance(LogicalUnit): @@ -2289,7 +2432,7 @@ class LURenameInstance(LogicalUnit): """ env = _BuildInstanceHookEnvByObject(self.instance) env["INSTANCE_NEW_NAME"] = self.op.new_name - nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] + + nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] + list(self.instance.secondary_nodes)) return env, nl, nl @@ -2307,7 +2450,8 @@ class LURenameInstance(LogicalUnit): if instance.status != "down": raise errors.OpPrereqError("Instance '%s' is marked to be up" % self.op.instance_name) - remote_info = rpc.call_instance_info(instance.primary_node, instance.name) + remote_info = rpc.call_instance_info(instance.primary_node, instance.name, + instance.hypervisor) if remote_info: raise errors.OpPrereqError("Instance '%s' is running on the node %s" % (self.op.instance_name, @@ -2340,6 +2484,9 @@ class LURenameInstance(LogicalUnit): old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1]) self.cfg.RenameInstance(inst.name, self.op.new_name) + # Change the instance lock. This is definitely safe while we hold the BGL + self.context.glm.remove(locking.LEVEL_INSTANCE, inst.name) + self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name) # re-read the instance from the configuration after rename inst = self.cfg.GetInstanceInfo(self.op.new_name) @@ -2363,16 +2510,16 @@ class LURenameInstance(LogicalUnit): " Ganeti)" % (old_file_storage_dir, new_file_storage_dir)) - _StartInstanceDisks(self.cfg, inst, None) + _StartInstanceDisks(self, inst, None) try: if not rpc.call_instance_run_rename(inst.primary_node, inst, old_name, "sda", "sdb"): - msg = ("Could run OS rename script for instance %s on node %s (but the" - " instance has been renamed in Ganeti)" % + msg = ("Could not run OS rename script for instance %s on node %s" + " (but the instance has been renamed in Ganeti)" % (inst.name, inst.primary_node)) logger.Error(msg) finally: - _ShutdownInstanceDisks(inst, self.cfg) + _ShutdownInstanceDisks(self, inst) class LURemoveInstance(LogicalUnit): @@ -2382,6 +2529,16 @@ class LURemoveInstance(LogicalUnit): HPATH = "instance-remove" HTYPE = constants.HTYPE_INSTANCE _OP_REQP = ["instance_name", "ignore_failures"] + REQ_BGL = False + + def ExpandNames(self): + self._ExpandAndLockInstance() + self.needed_locks[locking.LEVEL_NODE] = [] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + + def DeclareLocks(self, level): + if level == locking.LEVEL_NODE: + self._LockInstancesNodes() def BuildHooksEnv(self): """Build hooks env. @@ -2390,7 +2547,7 @@ class LURemoveInstance(LogicalUnit): """ env = _BuildInstanceHookEnvByObject(self.instance) - nl = [self.sstore.GetMasterNode()] + nl = [self.cfg.GetMasterNode()] return env, nl, nl def CheckPrereq(self): @@ -2399,12 +2556,9 @@ class LURemoveInstance(LogicalUnit): This checks that the instance is in the cluster. """ - instance = self.cfg.GetInstanceInfo( - self.cfg.ExpandInstanceName(self.op.instance_name)) - if instance is None: - raise errors.OpPrereqError("Instance '%s' not known" % - self.op.instance_name) - self.instance = instance + self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) + assert self.instance is not None, \ + "Cannot retrieve locked instance %s" % self.op.instance_name def Exec(self, feedback_fn): """Remove the instance. @@ -2423,7 +2577,7 @@ class LURemoveInstance(LogicalUnit): logger.Info("removing block devices for instance %s" % instance.name) - if not _RemoveDisks(instance, self.cfg): + if not _RemoveDisks(self, instance): if self.op.ignore_failures: feedback_fn("Warning: can't remove instance's disks") else: @@ -2432,8 +2586,7 @@ class LURemoveInstance(LogicalUnit): logger.Info("removing instance %s out of cluster config" % instance.name) self.cfg.RemoveInstance(instance.name) - # Remove the new instance from the Ganeti Lock Manager - self.context.glm.remove(locking.LEVEL_INSTANCE, instance.name) + self.remove_locks[locking.LEVEL_INSTANCE] = instance.name class LUQueryInstances(NoHooksLU): @@ -2441,39 +2594,77 @@ class LUQueryInstances(NoHooksLU): """ _OP_REQP = ["output_fields", "names"] + REQ_BGL = False - def CheckPrereq(self): - """Check prerequisites. - - This checks that the fields required are valid output fields. - - """ + def ExpandNames(self): self.dynamic_fields = frozenset(["oper_state", "oper_ram", "status"]) - _CheckOutputFields(static=["name", "os", "pnode", "snodes", - "admin_state", "admin_ram", - "disk_template", "ip", "mac", "bridge", - "sda_size", "sdb_size", "vcpus", "tags"], + self.static_fields = frozenset([ + "name", "os", "pnode", "snodes", + "admin_state", "admin_ram", + "disk_template", "ip", "mac", "bridge", + "sda_size", "sdb_size", "vcpus", "tags", + "network_port", "kernel_path", "initrd_path", + "hvm_boot_order", "hvm_acpi", "hvm_pae", + "hvm_cdrom_image_path", "hvm_nic_type", + "hvm_disk_type", "vnc_bind_address", + "serial_no", "hypervisor", + ]) + _CheckOutputFields(static=self.static_fields, dynamic=self.dynamic_fields, selected=self.op.output_fields) - self.wanted = _GetWantedInstances(self, self.op.names) + self.needed_locks = {} + self.share_locks[locking.LEVEL_INSTANCE] = 1 + self.share_locks[locking.LEVEL_NODE] = 1 - def Exec(self, feedback_fn): - """Computes the list of nodes and their attributes. + if self.op.names: + self.wanted = _GetWantedInstances(self, self.op.names) + else: + self.wanted = locking.ALL_SET + + self.do_locking = not self.static_fields.issuperset(self.op.output_fields) + if self.do_locking: + self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted + self.needed_locks[locking.LEVEL_NODE] = [] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + + def DeclareLocks(self, level): + if level == locking.LEVEL_NODE and self.do_locking: + self._LockInstancesNodes() + + def CheckPrereq(self): + """Check prerequisites. """ - instance_names = self.wanted - instance_list = [self.cfg.GetInstanceInfo(iname) for iname - in instance_names] + pass + + def Exec(self, feedback_fn): + """Computes the list of nodes and their attributes. + + """ + all_info = self.cfg.GetAllInstancesInfo() + if self.do_locking: + instance_names = self.acquired_locks[locking.LEVEL_INSTANCE] + elif self.wanted != locking.ALL_SET: + instance_names = self.wanted + missing = set(instance_names).difference(all_info.keys()) + if missing: + raise errors.OpExecError( + "Some instances were removed before retrieving their data: %s" + % missing) + else: + instance_names = all_info.keys() + instance_list = [all_info[iname] for iname in instance_names] # begin data gathering nodes = frozenset([inst.primary_node for inst in instance_list]) + hv_list = list(set([inst.hypervisor for inst in instance_list])) bad_nodes = [] if self.dynamic_fields.intersection(self.op.output_fields): live_data = {} - node_data = rpc.call_all_instances_info(nodes) + node_data = rpc.call_all_instances_info(nodes, hv_list) for name in nodes: result = node_data[name] if result: @@ -2547,6 +2738,22 @@ class LUQueryInstances(NoHooksLU): val = instance.vcpus elif field == "tags": val = list(instance.GetTags()) + elif field == "serial_no": + val = instance.serial_no + elif field in ("network_port", "kernel_path", "initrd_path", + "hvm_boot_order", "hvm_acpi", "hvm_pae", + "hvm_cdrom_image_path", "hvm_nic_type", + "hvm_disk_type", "vnc_bind_address"): + val = getattr(instance, field, None) + if val is not None: + pass + elif field in ("hvm_nic_type", "hvm_disk_type", + "kernel_path", "initrd_path"): + val = "default" + else: + val = "-" + elif field == "hypervisor": + val = instance.hypervisor else: raise errors.ParameterError(field) iout.append(val) @@ -2562,6 +2769,16 @@ class LUFailoverInstance(LogicalUnit): HPATH = "instance-failover" HTYPE = constants.HTYPE_INSTANCE _OP_REQP = ["instance_name", "ignore_consistency"] + REQ_BGL = False + + def ExpandNames(self): + self._ExpandAndLockInstance() + self.needed_locks[locking.LEVEL_NODE] = [] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + + def DeclareLocks(self, level): + if level == locking.LEVEL_NODE: + self._LockInstancesNodes() def BuildHooksEnv(self): """Build hooks env. @@ -2573,7 +2790,7 @@ class LUFailoverInstance(LogicalUnit): "IGNORE_CONSISTENCY": self.op.ignore_consistency, } env.update(_BuildInstanceHookEnvByObject(self.instance)) - nl = [self.sstore.GetMasterNode()] + list(self.instance.secondary_nodes) + nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes) return env, nl, nl def CheckPrereq(self): @@ -2582,11 +2799,9 @@ class LUFailoverInstance(LogicalUnit): This checks that the instance is in the cluster. """ - instance = self.cfg.GetInstanceInfo( - self.cfg.ExpandInstanceName(self.op.instance_name)) - if instance is None: - raise errors.OpPrereqError("Instance '%s' not known" % - self.op.instance_name) + self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) + assert self.instance is not None, \ + "Cannot retrieve locked instance %s" % self.op.instance_name if instance.disk_template not in constants.DTS_NET_MIRROR: raise errors.OpPrereqError("Instance's disk layout is not" @@ -2599,8 +2814,9 @@ class LUFailoverInstance(LogicalUnit): target_node = secondary_nodes[0] # check memory requirements on the secondary node - _CheckNodeFreeMemory(self.cfg, target_node, "failing over instance %s" % - instance.name, instance.memory) + _CheckNodeFreeMemory(self, target_node, "failing over instance %s" % + instance.name, instance.memory, + instance.hypervisor) # check bridge existance brlist = [nic.bridge for nic in instance.nics] @@ -2609,8 +2825,6 @@ class LUFailoverInstance(LogicalUnit): " exist on destination node '%s'" % (brlist, target_node)) - self.instance = instance - def Exec(self, feedback_fn): """Failover an instance. @@ -2626,7 +2840,7 @@ class LUFailoverInstance(LogicalUnit): feedback_fn("* checking disk consistency between source and target") for dev in instance.disks: # for drbd, these are drbd over lvm - if not _CheckDiskConsistency(self.cfg, dev, target_node, False): + if not _CheckDiskConsistency(self, dev, target_node, False): if instance.status == "up" and not self.op.ignore_consistency: raise errors.OpExecError("Disk %s is degraded on target node," " aborting failover." % dev.iv_name) @@ -2645,7 +2859,7 @@ class LUFailoverInstance(LogicalUnit): (instance.name, source_node)) feedback_fn("* deactivating the instance's disks on source node") - if not _ShutdownInstanceDisks(instance, self.cfg, ignore_primary=True): + if not _ShutdownInstanceDisks(self, instance, ignore_primary=True): raise errors.OpExecError("Can't shut down the instance's disks.") instance.primary_node = target_node @@ -2658,20 +2872,20 @@ class LUFailoverInstance(LogicalUnit): logger.Info("Starting instance %s on node %s" % (instance.name, target_node)) - disks_ok, dummy = _AssembleInstanceDisks(instance, self.cfg, + disks_ok, dummy = _AssembleInstanceDisks(self, instance, ignore_secondaries=True) if not disks_ok: - _ShutdownInstanceDisks(instance, self.cfg) + _ShutdownInstanceDisks(self, instance) raise errors.OpExecError("Can't activate the instance's disks") feedback_fn("* starting the instance on the target node") if not rpc.call_instance_start(target_node, instance, None): - _ShutdownInstanceDisks(instance, self.cfg) + _ShutdownInstanceDisks(self, instance) raise errors.OpExecError("Could not start instance %s on node %s." % (instance.name, target_node)) -def _CreateBlockDevOnPrimary(cfg, node, instance, device, info): +def _CreateBlockDevOnPrimary(lu, node, instance, device, info): """Create a tree of block devices on the primary node. This always creates all devices. @@ -2679,10 +2893,10 @@ def _CreateBlockDevOnPrimary(cfg, node, instance, device, info): """ if device.children: for child in device.children: - if not _CreateBlockDevOnPrimary(cfg, node, instance, child, info): + if not _CreateBlockDevOnPrimary(lu, node, instance, child, info): return False - cfg.SetDiskID(device, node) + lu.cfg.SetDiskID(device, node) new_id = rpc.call_blockdev_create(node, device, device.size, instance.name, True, info) if not new_id: @@ -2692,7 +2906,7 @@ def _CreateBlockDevOnPrimary(cfg, node, instance, device, info): return True -def _CreateBlockDevOnSecondary(cfg, node, instance, device, force, info): +def _CreateBlockDevOnSecondary(lu, node, instance, device, force, info): """Create a tree of block devices on a secondary node. If this device type has to be created on secondaries, create it and @@ -2705,13 +2919,13 @@ def _CreateBlockDevOnSecondary(cfg, node, instance, device, force, info): force = True if device.children: for child in device.children: - if not _CreateBlockDevOnSecondary(cfg, node, instance, + if not _CreateBlockDevOnSecondary(lu, node, instance, child, force, info): return False if not force: return True - cfg.SetDiskID(device, node) + lu.cfg.SetDiskID(device, node) new_id = rpc.call_blockdev_create(node, device, device.size, instance.name, False, info) if not new_id: @@ -2721,7 +2935,7 @@ def _CreateBlockDevOnSecondary(cfg, node, instance, device, force, info): return True -def _GenerateUniqueNames(cfg, exts): +def _GenerateUniqueNames(lu, exts): """Generate a suitable LV name. This will generate a logical volume name for the given instance. @@ -2729,29 +2943,33 @@ def _GenerateUniqueNames(cfg, exts): """ results = [] for val in exts: - new_id = cfg.GenerateUniqueID() + new_id = lu.cfg.GenerateUniqueID() results.append("%s%s" % (new_id, val)) return results -def _GenerateDRBD8Branch(cfg, primary, secondary, size, names, iv_name): +def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name, + p_minor, s_minor): """Generate a drbd8 device complete with its children. """ - port = cfg.AllocatePort() - vgname = cfg.GetVGName() + port = lu.cfg.AllocatePort() + vgname = lu.cfg.GetVGName() + shared_secret = lu.cfg.GenerateDRBDSecret() dev_data = objects.Disk(dev_type=constants.LD_LV, size=size, logical_id=(vgname, names[0])) dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128, logical_id=(vgname, names[1])) drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size, - logical_id = (primary, secondary, port), - children = [dev_data, dev_meta], + logical_id=(primary, secondary, port, + p_minor, s_minor, + shared_secret), + children=[dev_data, dev_meta], iv_name=iv_name) return drbd_dev -def _GenerateDiskTemplate(cfg, template_name, +def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node, secondary_nodes, disk_sz, swap_sz, file_storage_dir, file_driver): @@ -2760,14 +2978,14 @@ def _GenerateDiskTemplate(cfg, template_name, """ #TODO: compute space requirements - vgname = cfg.GetVGName() + vgname = lu.cfg.GetVGName() if template_name == constants.DT_DISKLESS: disks = [] elif template_name == constants.DT_PLAIN: if len(secondary_nodes) != 0: raise errors.ProgrammerError("Wrong template configuration") - names = _GenerateUniqueNames(cfg, [".sda", ".sdb"]) + names = _GenerateUniqueNames(lu, [".sda", ".sdb"]) sda_dev = objects.Disk(dev_type=constants.LD_LV, size=disk_sz, logical_id=(vgname, names[0]), iv_name = "sda") @@ -2779,12 +2997,18 @@ def _GenerateDiskTemplate(cfg, template_name, if len(secondary_nodes) != 1: raise errors.ProgrammerError("Wrong template configuration") remote_node = secondary_nodes[0] - names = _GenerateUniqueNames(cfg, [".sda_data", ".sda_meta", - ".sdb_data", ".sdb_meta"]) - drbd_sda_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node, - disk_sz, names[0:2], "sda") - drbd_sdb_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node, - swap_sz, names[2:4], "sdb") + (minor_pa, minor_pb, + minor_sa, minor_sb) = lu.cfg.AllocateDRBDMinor( + [primary_node, primary_node, remote_node, remote_node], instance_name) + + names = _GenerateUniqueNames(lu, [".sda_data", ".sda_meta", + ".sdb_data", ".sdb_meta"]) + drbd_sda_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node, + disk_sz, names[0:2], "sda", + minor_pa, minor_sa) + drbd_sdb_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node, + swap_sz, names[2:4], "sdb", + minor_pb, minor_sb) disks = [drbd_sda_dev, drbd_sdb_dev] elif template_name == constants.DT_FILE: if len(secondary_nodes) != 0: @@ -2809,7 +3033,7 @@ def _GetInstanceInfoText(instance): return "originstname+%s" % instance.name -def _CreateDisks(cfg, instance): +def _CreateDisks(lu, instance): """Create all disks for an instance. This abstracts away some work from AddInstance. @@ -2841,13 +3065,13 @@ def _CreateDisks(cfg, instance): (device.iv_name, instance.name)) #HARDCODE for secondary_node in instance.secondary_nodes: - if not _CreateBlockDevOnSecondary(cfg, secondary_node, instance, + if not _CreateBlockDevOnSecondary(lu, secondary_node, instance, device, False, info): logger.Error("failed to create volume %s (%s) on secondary node %s!" % (device.iv_name, device, secondary_node)) return False #HARDCODE - if not _CreateBlockDevOnPrimary(cfg, instance.primary_node, + if not _CreateBlockDevOnPrimary(lu, instance.primary_node, instance, device, info): logger.Error("failed to create volume %s on primary!" % device.iv_name) @@ -2856,7 +3080,7 @@ def _CreateDisks(cfg, instance): return True -def _RemoveDisks(instance, cfg): +def _RemoveDisks(lu, instance): """Remove all disks for an instance. This abstracts away some work from `AddInstance()` and @@ -2876,7 +3100,7 @@ def _RemoveDisks(instance, cfg): result = True for device in instance.disks: for node, disk in device.ComputeNodeTree(instance.primary_node): - cfg.SetDiskID(disk, node) + lu.cfg.SetDiskID(disk, node) if not rpc.call_blockdev_remove(node, disk): logger.Error("could not remove block device %s on node %s," " continuing anyway" % @@ -2924,6 +3148,137 @@ class LUCreateInstance(LogicalUnit): _OP_REQP = ["instance_name", "mem_size", "disk_size", "disk_template", "swap_size", "mode", "start", "vcpus", "wait_for_sync", "ip_check", "mac"] + REQ_BGL = False + + def _ExpandNode(self, node): + """Expands and checks one node name. + + """ + node_full = self.cfg.ExpandNodeName(node) + if node_full is None: + raise errors.OpPrereqError("Unknown node %s" % node) + return node_full + + def ExpandNames(self): + """ExpandNames for CreateInstance. + + Figure out the right locks for instance creation. + + """ + self.needed_locks = {} + + # set optional parameters to none if they don't exist + for attr in ["kernel_path", "initrd_path", "pnode", "snode", + "iallocator", "hvm_boot_order", "hvm_acpi", "hvm_pae", + "hvm_cdrom_image_path", "hvm_nic_type", "hvm_disk_type", + "vnc_bind_address", "hypervisor"]: + if not hasattr(self.op, attr): + setattr(self.op, attr, None) + + # cheap checks, mostly valid constants given + + # verify creation mode + if self.op.mode not in (constants.INSTANCE_CREATE, + constants.INSTANCE_IMPORT): + raise errors.OpPrereqError("Invalid instance creation mode '%s'" % + self.op.mode) + + # disk template and mirror node verification + if self.op.disk_template not in constants.DISK_TEMPLATES: + raise errors.OpPrereqError("Invalid disk template name") + + if self.op.hypervisor is None: + self.op.hypervisor = self.cfg.GetHypervisorType() + + enabled_hvs = self.cfg.GetClusterInfo().enabled_hypervisors + if self.op.hypervisor not in enabled_hvs: + raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the" + " cluster (%s)" % (self.op.hypervisor, + ",".join(enabled_hvs))) + + #### instance parameters check + + # instance name verification + hostname1 = utils.HostInfo(self.op.instance_name) + self.op.instance_name = instance_name = hostname1.name + + # this is just a preventive check, but someone might still add this + # instance in the meantime, and creation will fail at lock-add time + if instance_name in self.cfg.GetInstanceList(): + raise errors.OpPrereqError("Instance '%s' is already in the cluster" % + instance_name) + + self.add_locks[locking.LEVEL_INSTANCE] = instance_name + + # ip validity checks + ip = getattr(self.op, "ip", None) + if ip is None or ip.lower() == "none": + inst_ip = None + elif ip.lower() == "auto": + inst_ip = hostname1.ip + else: + if not utils.IsValidIP(ip): + raise errors.OpPrereqError("given IP address '%s' doesn't look" + " like a valid IP" % ip) + inst_ip = ip + self.inst_ip = self.op.ip = inst_ip + # used in CheckPrereq for ip ping check + self.check_ip = hostname1.ip + + # MAC address verification + if self.op.mac != "auto": + if not utils.IsValidMac(self.op.mac.lower()): + raise errors.OpPrereqError("invalid MAC address specified: %s" % + self.op.mac) + + # boot order verification + if self.op.hvm_boot_order is not None: + if len(self.op.hvm_boot_order.strip("acdn")) != 0: + raise errors.OpPrereqError("invalid boot order specified," + " must be one or more of [acdn]") + # file storage checks + if (self.op.file_driver and + not self.op.file_driver in constants.FILE_DRIVER): + raise errors.OpPrereqError("Invalid file driver name '%s'" % + self.op.file_driver) + + if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir): + raise errors.OpPrereqError("File storage directory path not absolute") + + ### Node/iallocator related checks + if [self.op.iallocator, self.op.pnode].count(None) != 1: + raise errors.OpPrereqError("One and only one of iallocator and primary" + " node must be given") + + if self.op.iallocator: + self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET + else: + self.op.pnode = self._ExpandNode(self.op.pnode) + nodelist = [self.op.pnode] + if self.op.snode is not None: + self.op.snode = self._ExpandNode(self.op.snode) + nodelist.append(self.op.snode) + self.needed_locks[locking.LEVEL_NODE] = nodelist + + # in case of import lock the source node too + if self.op.mode == constants.INSTANCE_IMPORT: + src_node = getattr(self.op, "src_node", None) + src_path = getattr(self.op, "src_path", None) + + if src_node is None or src_path is None: + raise errors.OpPrereqError("Importing an instance requires source" + " node and path options") + + if not os.path.isabs(src_path): + raise errors.OpPrereqError("The source path must be absolute") + + self.op.src_node = src_node = self._ExpandNode(src_node) + if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET: + self.needed_locks[locking.LEVEL_NODE].append(src_node) + + else: # INSTANCE_CREATE + if getattr(self.op, "os_type", None) is None: + raise errors.OpPrereqError("No guest OS specified") def _RunAllocator(self): """Run the allocator based on input opcode. @@ -2933,7 +3288,7 @@ class LUCreateInstance(LogicalUnit): {"size": self.op.swap_size, "mode": "w"}] nics = [{"mac": self.op.mac, "ip": getattr(self.op, "ip", None), "bridge": self.op.bridge}] - ial = IAllocator(self.cfg, self.sstore, + ial = IAllocator(self.cfg, mode=constants.IALLOCATOR_MODE_ALLOC, name=self.op.instance_name, disk_template=self.op.disk_template, @@ -2954,7 +3309,8 @@ class LUCreateInstance(LogicalUnit): if len(ial.nodes) != ial.required_nodes: raise errors.OpPrereqError("iallocator '%s' returned invalid number" " of nodes (%s), required %s" % - (len(ial.nodes), ial.required_nodes)) + (self.op.iallocator, len(ial.nodes), + ial.required_nodes)) self.op.pnode = ial.nodes[0] logger.ToStdout("Selected nodes for the instance: %s" % (", ".join(ial.nodes),)) @@ -2990,7 +3346,7 @@ class LUCreateInstance(LogicalUnit): nics=[(self.inst_ip, self.op.bridge, self.op.mac)], )) - nl = ([self.sstore.GetMasterNode(), self.op.pnode] + + nl = ([self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries) return env, nl, nl @@ -2999,36 +3355,15 @@ class LUCreateInstance(LogicalUnit): """Check prerequisites. """ - # set optional parameters to none if they don't exist - for attr in ["kernel_path", "initrd_path", "hvm_boot_order", "pnode", - "iallocator", "hvm_acpi", "hvm_pae", "hvm_cdrom_image_path", - "vnc_bind_address"]: - if not hasattr(self.op, attr): - setattr(self.op, attr, None) - - if self.op.mode not in (constants.INSTANCE_CREATE, - constants.INSTANCE_IMPORT): - raise errors.OpPrereqError("Invalid instance creation mode '%s'" % - self.op.mode) - if (not self.cfg.GetVGName() and self.op.disk_template not in constants.DTS_NOT_LVM): raise errors.OpPrereqError("Cluster does not support lvm-based" " instances") - if self.op.mode == constants.INSTANCE_IMPORT: - src_node = getattr(self.op, "src_node", None) - src_path = getattr(self.op, "src_path", None) - if src_node is None or src_path is None: - raise errors.OpPrereqError("Importing an instance requires source" - " node and path options") - src_node_full = self.cfg.ExpandNodeName(src_node) - if src_node_full is None: - raise errors.OpPrereqError("Unknown source node '%s'" % src_node) - self.op.src_node = src_node = src_node_full - if not os.path.isabs(src_path): - raise errors.OpPrereqError("The source path must be absolute") + if self.op.mode == constants.INSTANCE_IMPORT: + src_node = self.op.src_node + src_path = self.op.src_path export_info = rpc.call_export_info(src_node, src_path) @@ -3052,52 +3387,17 @@ class LUCreateInstance(LogicalUnit): diskimage = os.path.join(src_path, export_info.get(constants.INISECT_INS, 'disk0_dump')) self.src_image = diskimage - else: # INSTANCE_CREATE - if getattr(self.op, "os_type", None) is None: - raise errors.OpPrereqError("No guest OS specified") - - #### instance parameters check - - # disk template and mirror node verification - if self.op.disk_template not in constants.DISK_TEMPLATES: - raise errors.OpPrereqError("Invalid disk template name") - - # instance name verification - hostname1 = utils.HostInfo(self.op.instance_name) - - self.op.instance_name = instance_name = hostname1.name - instance_list = self.cfg.GetInstanceList() - if instance_name in instance_list: - raise errors.OpPrereqError("Instance '%s' is already in the cluster" % - instance_name) - # ip validity checks - ip = getattr(self.op, "ip", None) - if ip is None or ip.lower() == "none": - inst_ip = None - elif ip.lower() == "auto": - inst_ip = hostname1.ip - else: - if not utils.IsValidIP(ip): - raise errors.OpPrereqError("given IP address '%s' doesn't look" - " like a valid IP" % ip) - inst_ip = ip - self.inst_ip = self.op.ip = inst_ip + # ip ping checks (we use the same ip that was resolved in ExpandNames) if self.op.start and not self.op.ip_check: raise errors.OpPrereqError("Cannot ignore IP address conflicts when" " adding an instance in start mode") if self.op.ip_check: - if utils.TcpPing(hostname1.ip, constants.DEFAULT_NODED_PORT): + if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT): raise errors.OpPrereqError("IP %s of instance %s already in use" % - (hostname1.ip, instance_name)) - - # MAC address verification - if self.op.mac != "auto": - if not utils.IsValidMac(self.op.mac.lower()): - raise errors.OpPrereqError("invalid MAC address specified: %s" % - self.op.mac) + (self.check_ip, self.op.instance_name)) # bridge verification bridge = getattr(self.op, "bridge", None) @@ -3106,54 +3406,28 @@ class LUCreateInstance(LogicalUnit): else: self.op.bridge = bridge - # boot order verification - if self.op.hvm_boot_order is not None: - if len(self.op.hvm_boot_order.strip("acdn")) != 0: - raise errors.OpPrereqError("invalid boot order specified," - " must be one or more of [acdn]") - # file storage checks - if (self.op.file_driver and - not self.op.file_driver in constants.FILE_DRIVER): - raise errors.OpPrereqError("Invalid file driver name '%s'" % - self.op.file_driver) - - if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir): - raise errors.OpPrereqError("File storage directory not a relative" - " path") #### allocator run - if [self.op.iallocator, self.op.pnode].count(None) != 1: - raise errors.OpPrereqError("One and only one of iallocator and primary" - " node must be given") - if self.op.iallocator is not None: self._RunAllocator() #### node related checks # check primary node - pnode = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.pnode)) - if pnode is None: - raise errors.OpPrereqError("Primary node '%s' is unknown" % - self.op.pnode) - self.op.pnode = pnode.name - self.pnode = pnode + self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode) + assert self.pnode is not None, \ + "Cannot retrieve locked node %s" % self.op.pnode self.secondaries = [] # mirror node verification if self.op.disk_template in constants.DTS_NET_MIRROR: - if getattr(self.op, "snode", None) is None: + if self.op.snode is None: raise errors.OpPrereqError("The networked disk templates need" " a mirror node") - - snode_name = self.cfg.ExpandNodeName(self.op.snode) - if snode_name is None: - raise errors.OpPrereqError("Unknown secondary node '%s'" % - self.op.snode) - elif snode_name == pnode.name: + if self.op.snode == pnode.name: raise errors.OpPrereqError("The secondary node cannot be" " the primary node.") - self.secondaries.append(snode_name) + self.secondaries.append(self.op.snode) req_size = _ComputeDiskSize(self.op.disk_template, self.op.disk_size, self.op.swap_size) @@ -3161,7 +3435,8 @@ class LUCreateInstance(LogicalUnit): # Check lv size requirements if req_size is not None: nodenames = [pnode.name] + self.secondaries - nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName()) + nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName(), + self.op.hypervisor) for node in nodenames: info = nodeinfo.get(node, None) if not info: @@ -3185,7 +3460,6 @@ class LUCreateInstance(LogicalUnit): if self.op.kernel_path == constants.VALUE_NONE: raise errors.OpPrereqError("Can't set instance kernel to none") - # bridge check on primary node if not rpc.call_bridges_exist(self.pnode.name, [self.op.bridge]): raise errors.OpPrereqError("target bridge '%s' does not exist on" @@ -3194,12 +3468,13 @@ class LUCreateInstance(LogicalUnit): # memory check on primary node if self.op.start: - _CheckNodeFreeMemory(self.cfg, self.pnode.name, + _CheckNodeFreeMemory(self, self.pnode.name, "creating instance %s" % self.op.instance_name, - self.op.mem_size) + self.op.mem_size, self.op.hypervisor) # hvm_cdrom_image_path verification if self.op.hvm_cdrom_image_path is not None: + # FIXME (als): shouldn't these checks happen on the destination node? if not os.path.isabs(self.op.hvm_cdrom_image_path): raise errors.OpPrereqError("The path to the HVM CDROM image must" " be an absolute path or None, not %s" % @@ -3217,6 +3492,15 @@ class LUCreateInstance(LogicalUnit): " like a valid IP address" % self.op.vnc_bind_address) + # Xen HVM device type checks + if self.op.hypervisor == constants.HT_XEN_HVM: + if self.op.hvm_nic_type not in constants.HT_HVM_VALID_NIC_TYPES: + raise errors.OpPrereqError("Invalid NIC type %s specified for Xen HVM" + " hypervisor" % self.op.hvm_nic_type) + if self.op.hvm_disk_type not in constants.HT_HVM_VALID_DISK_TYPES: + raise errors.OpPrereqError("Invalid disk type %s specified for Xen HVM" + " hypervisor" % self.op.hvm_disk_type) + if self.op.start: self.instance_status = 'up' else: @@ -3238,7 +3522,7 @@ class LUCreateInstance(LogicalUnit): if self.inst_ip is not None: nic.ip = self.inst_ip - ht_kind = self.sstore.GetHypervisorType() + ht_kind = self.op.hypervisor if ht_kind in constants.HTS_REQ_PORT: network_port = self.cfg.AllocatePort() else: @@ -3255,11 +3539,11 @@ class LUCreateInstance(LogicalUnit): # build the full file storage dir path file_storage_dir = os.path.normpath(os.path.join( - self.sstore.GetFileStorageDir(), + self.cfg.GetFileStorageDir(), string_file_storage_dir, instance)) - disks = _GenerateDiskTemplate(self.cfg, + disks = _GenerateDiskTemplate(self, self.op.disk_template, instance, pnode_name, self.secondaries, self.op.disk_size, @@ -3282,34 +3566,41 @@ class LUCreateInstance(LogicalUnit): hvm_pae=self.op.hvm_pae, hvm_cdrom_image_path=self.op.hvm_cdrom_image_path, vnc_bind_address=self.op.vnc_bind_address, + hvm_nic_type=self.op.hvm_nic_type, + hvm_disk_type=self.op.hvm_disk_type, + hypervisor=self.op.hypervisor, ) feedback_fn("* creating instance disks...") - if not _CreateDisks(self.cfg, iobj): - _RemoveDisks(iobj, self.cfg) + if not _CreateDisks(self, iobj): + _RemoveDisks(self, iobj) + self.cfg.ReleaseDRBDMinors(instance) raise errors.OpExecError("Device creation failed, reverting...") feedback_fn("adding instance %s to cluster config" % instance) self.cfg.AddInstance(iobj) - # Add the new instance to the Ganeti Lock Manager - self.context.glm.add(locking.LEVEL_INSTANCE, instance) + # Declare that we don't want to remove the instance lock anymore, as we've + # added the instance to the config + del self.remove_locks[locking.LEVEL_INSTANCE] + # Remove the temp. assignements for the instance's drbds + self.cfg.ReleaseDRBDMinors(instance) if self.op.wait_for_sync: - disk_abort = not _WaitForSync(self.cfg, iobj, self.proc) + disk_abort = not _WaitForSync(self, iobj) elif iobj.disk_template in constants.DTS_NET_MIRROR: # make sure the disks are not degraded (still sync-ing is ok) time.sleep(15) feedback_fn("* checking mirrors status") - disk_abort = not _WaitForSync(self.cfg, iobj, self.proc, oneshot=True) + disk_abort = not _WaitForSync(self, iobj, oneshot=True) else: disk_abort = False if disk_abort: - _RemoveDisks(iobj, self.cfg) + _RemoveDisks(self, iobj) self.cfg.RemoveInstance(iobj.name) - # Remove the new instance from the Ganeti Lock Manager - self.context.glm.remove(locking.LEVEL_INSTANCE, iobj.name) + # Make sure the instance lock gets removed + self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name raise errors.OpExecError("There are some degraded disks for" " this instance") @@ -3328,8 +3619,9 @@ class LUCreateInstance(LogicalUnit): feedback_fn("* running the instance OS import scripts...") src_node = self.op.src_node src_image = self.src_image + cluster_name = self.cfg.GetClusterName() if not rpc.call_instance_os_import(pnode_name, iobj, "sda", "sdb", - src_node, src_image): + src_node, src_image, cluster_name): raise errors.OpExecError("Could not import os for instance" " %s on node %s" % (instance, pnode_name)) @@ -3376,7 +3668,8 @@ class LUConnectConsole(NoHooksLU): instance = self.instance node = instance.primary_node - node_insts = rpc.call_instance_list([node])[node] + node_insts = rpc.call_instance_list([node], + [instance.hypervisor])[node] if node_insts is False: raise errors.OpExecError("Can't connect to node %s." % node) @@ -3385,7 +3678,7 @@ class LUConnectConsole(NoHooksLU): logger.Debug("connecting to console of %s on %s" % (instance.name, node)) - hyper = hypervisor.GetHypervisor() + hyper = hypervisor.GetHypervisor(instance.hypervisor) console_cmd = hyper.GetShellCommandForConsole(instance) # build ssh cmdline @@ -3399,12 +3692,44 @@ class LUReplaceDisks(LogicalUnit): HPATH = "mirrors-replace" HTYPE = constants.HTYPE_INSTANCE _OP_REQP = ["instance_name", "mode", "disks"] + REQ_BGL = False + + def ExpandNames(self): + self._ExpandAndLockInstance() + + if not hasattr(self.op, "remote_node"): + self.op.remote_node = None + + ia_name = getattr(self.op, "iallocator", None) + if ia_name is not None: + if self.op.remote_node is not None: + raise errors.OpPrereqError("Give either the iallocator or the new" + " secondary, not both") + self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET + elif self.op.remote_node is not None: + remote_node = self.cfg.ExpandNodeName(self.op.remote_node) + if remote_node is None: + raise errors.OpPrereqError("Node '%s' not known" % + self.op.remote_node) + self.op.remote_node = remote_node + self.needed_locks[locking.LEVEL_NODE] = [remote_node] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND + else: + self.needed_locks[locking.LEVEL_NODE] = [] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + + def DeclareLocks(self, level): + # If we're not already locking all nodes in the set we have to declare the + # instance's primary/secondary nodes. + if (level == locking.LEVEL_NODE and + self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET): + self._LockInstancesNodes() def _RunAllocator(self): """Compute a new secondary node using an IAllocator. """ - ial = IAllocator(self.cfg, self.sstore, + ial = IAllocator(self.cfg, mode=constants.IALLOCATOR_MODE_RELOC, name=self.op.instance_name, relocate_from=[self.sec_node]) @@ -3436,7 +3761,7 @@ class LUReplaceDisks(LogicalUnit): } env.update(_BuildInstanceHookEnvByObject(self.instance)) nl = [ - self.sstore.GetMasterNode(), + self.cfg.GetMasterNode(), self.instance.primary_node, ] if self.op.remote_node is not None: @@ -3449,16 +3774,10 @@ class LUReplaceDisks(LogicalUnit): This checks that the instance is in the cluster. """ - if not hasattr(self.op, "remote_node"): - self.op.remote_node = None - - instance = self.cfg.GetInstanceInfo( - self.cfg.ExpandInstanceName(self.op.instance_name)) - if instance is None: - raise errors.OpPrereqError("Instance '%s' not known" % - self.op.instance_name) + instance = self.cfg.GetInstanceInfo(self.op.instance_name) + assert instance is not None, \ + "Cannot retrieve locked instance %s" % self.op.instance_name self.instance = instance - self.op.instance_name = instance.name if instance.disk_template not in constants.DTS_NET_MIRROR: raise errors.OpPrereqError("Instance's disk layout is not" @@ -3473,18 +3792,13 @@ class LUReplaceDisks(LogicalUnit): ia_name = getattr(self.op, "iallocator", None) if ia_name is not None: - if self.op.remote_node is not None: - raise errors.OpPrereqError("Give either the iallocator or the new" - " secondary, not both") - self.op.remote_node = self._RunAllocator() + self._RunAllocator() remote_node = self.op.remote_node if remote_node is not None: - remote_node = self.cfg.ExpandNodeName(remote_node) - if remote_node is None: - raise errors.OpPrereqError("Node '%s' not known" % - self.op.remote_node) self.remote_node_info = self.cfg.GetNodeInfo(remote_node) + assert self.remote_node_info is not None, \ + "Cannot retrieve locked node %s" % remote_node else: self.remote_node_info = None if remote_node == instance.primary_node: @@ -3525,7 +3839,6 @@ class LUReplaceDisks(LogicalUnit): if instance.FindDisk(name) is None: raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" % (name, instance.name)) - self.op.remote_node = remote_node def _ExecD8DiskOnly(self, feedback_fn): """Replace a disk on the primary or secondary for dbrd8. @@ -3582,7 +3895,7 @@ class LUReplaceDisks(LogicalUnit): if not dev.iv_name in self.op.disks: continue info("checking %s consistency on %s" % (dev.iv_name, oth_node)) - if not _CheckDiskConsistency(self.cfg, dev, oth_node, + if not _CheckDiskConsistency(self, dev, oth_node, oth_node==instance.primary_node): raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe" " to replace disks on this node (%s)" % @@ -3596,7 +3909,7 @@ class LUReplaceDisks(LogicalUnit): size = dev.size cfg.SetDiskID(dev, tgt_node) lv_names = [".%s_%s" % (dev.iv_name, suf) for suf in ["data", "meta"]] - names = _GenerateUniqueNames(cfg, lv_names) + names = _GenerateUniqueNames(self, lv_names) lv_data = objects.Disk(dev_type=constants.LD_LV, size=size, logical_id=(vgname, names[0])) lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128, @@ -3610,7 +3923,7 @@ class LUReplaceDisks(LogicalUnit): # _Create...OnPrimary (which forces the creation), even if we # are talking about the secondary node for new_lv in new_lvs: - if not _CreateBlockDevOnPrimary(cfg, tgt_node, instance, new_lv, + if not _CreateBlockDevOnPrimary(self, tgt_node, instance, new_lv, _GetInstanceInfoText(instance)): raise errors.OpExecError("Failed to create new LV named '%s' on" " node '%s'" % @@ -3678,7 +3991,7 @@ class LUReplaceDisks(LogicalUnit): # does a combined result over all disks, so we don't check its # return value self.proc.LogStep(5, steps_total, "sync devices") - _WaitForSync(cfg, instance, self.proc, unlock=True) + _WaitForSync(self, instance, unlock=True) # so check manually all the devices for name, (dev, old_lvs, new_lvs) in iv_names.iteritems(): @@ -3754,7 +4067,7 @@ class LUReplaceDisks(LogicalUnit): if not dev.iv_name in self.op.disks: continue info("checking %s consistency on %s" % (dev.iv_name, pri_node)) - if not _CheckDiskConsistency(self.cfg, dev, pri_node, True, ldisk=True): + if not _CheckDiskConsistency(self, dev, pri_node, True, ldisk=True): raise errors.OpExecError("Primary node (%s) has degraded storage," " unsafe to replace the secondary" % pri_node) @@ -3768,26 +4081,42 @@ class LUReplaceDisks(LogicalUnit): # _Create...OnPrimary (which forces the creation), even if we # are talking about the secondary node for new_lv in dev.children: - if not _CreateBlockDevOnPrimary(cfg, new_node, instance, new_lv, + if not _CreateBlockDevOnPrimary(self, new_node, instance, new_lv, _GetInstanceInfoText(instance)): raise errors.OpExecError("Failed to create new LV named '%s' on" " node '%s'" % (new_lv.logical_id[1], new_node)) - iv_names[dev.iv_name] = (dev, dev.children) + # Step 4: dbrd minors and drbd setups changes + # after this, we must manually remove the drbd minors on both the + # error and the success paths + minors = cfg.AllocateDRBDMinor([new_node for dev in instance.disks], + instance.name) + logging.debug("Allocated minors %s" % (minors,)) self.proc.LogStep(4, steps_total, "changing drbd configuration") - for dev in instance.disks: + for dev, new_minor in zip(instance.disks, minors): size = dev.size info("activating a new drbd on %s for %s" % (new_node, dev.iv_name)) # create new devices on new_node + if pri_node == dev.logical_id[0]: + new_logical_id = (pri_node, new_node, + dev.logical_id[2], dev.logical_id[3], new_minor, + dev.logical_id[5]) + else: + new_logical_id = (new_node, pri_node, + dev.logical_id[2], new_minor, dev.logical_id[4], + dev.logical_id[5]) + iv_names[dev.iv_name] = (dev, dev.children, new_logical_id) + logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor, + new_logical_id) new_drbd = objects.Disk(dev_type=constants.LD_DRBD8, - logical_id=(pri_node, new_node, - dev.logical_id[2]), + logical_id=new_logical_id, children=dev.children) - if not _CreateBlockDevOnSecondary(cfg, new_node, instance, + if not _CreateBlockDevOnSecondary(self, new_node, instance, new_drbd, False, - _GetInstanceInfoText(instance)): + _GetInstanceInfoText(instance)): + self.cfg.ReleaseDRBDMinors(instance.name) raise errors.OpExecError("Failed to create new DRBD on" " node '%s'" % new_node) @@ -3803,9 +4132,9 @@ class LUReplaceDisks(LogicalUnit): done = 0 for dev in instance.disks: cfg.SetDiskID(dev, pri_node) - # set the physical (unique in bdev terms) id to None, meaning - # detach from network - dev.physical_id = (None,) * len(dev.physical_id) + # set the network part of the physical (unique in bdev terms) id + # to None, meaning detach from network + dev.physical_id = (None, None, None, None) + dev.physical_id[4:] # and 'find' the device, which will 'fix' it to match the # standalone state if rpc.call_blockdev_find(pri_node, dev): @@ -3816,15 +4145,19 @@ class LUReplaceDisks(LogicalUnit): if not done: # no detaches succeeded (very unlikely) + self.cfg.ReleaseDRBDMinors(instance.name) raise errors.OpExecError("Can't detach at least one DRBD from old node") # if we managed to detach at least one, we update all the disks of # the instance to point to the new secondary info("updating instance configuration") - for dev in instance.disks: - dev.logical_id = (pri_node, new_node) + dev.logical_id[2:] + for dev, _, new_logical_id in iv_names.itervalues(): + dev.logical_id = new_logical_id cfg.SetDiskID(dev, pri_node) cfg.Update(instance) + # we can remove now the temp minors as now the new values are + # written to the config file (and therefore stable) + self.cfg.ReleaseDRBDMinors(instance.name) # and now perform the drbd attach info("attaching primary drbds to new secondary (standalone => connected)") @@ -3835,6 +4168,7 @@ class LUReplaceDisks(LogicalUnit): # it will automatically activate the network, if the physical_id # is correct cfg.SetDiskID(dev, pri_node) + logging.debug("Disk to attach: %s", dev) if not rpc.call_blockdev_find(pri_node, dev): warning("can't attach drbd %s to new secondary!" % dev.iv_name, "please do a gnt-instance info to see the status of disks") @@ -3843,17 +4177,17 @@ class LUReplaceDisks(LogicalUnit): # does a combined result over all disks, so we don't check its # return value self.proc.LogStep(5, steps_total, "sync devices") - _WaitForSync(cfg, instance, self.proc, unlock=True) + _WaitForSync(self, instance, unlock=True) # so check manually all the devices - for name, (dev, old_lvs) in iv_names.iteritems(): + for name, (dev, old_lvs, _) in iv_names.iteritems(): cfg.SetDiskID(dev, pri_node) is_degr = rpc.call_blockdev_find(pri_node, dev)[5] if is_degr: raise errors.OpExecError("DRBD device %s is degraded!" % name) self.proc.LogStep(6, steps_total, "removing old storage") - for name, (dev, old_lvs) in iv_names.iteritems(): + for name, (dev, old_lvs, _) in iv_names.iteritems(): info("remove logical volumes for %s" % name) for lv in old_lvs: cfg.SetDiskID(lv, old_node) @@ -3871,8 +4205,7 @@ class LUReplaceDisks(LogicalUnit): # Activate the instance disks if we're replacing them on a down instance if instance.status == "down": - op = opcodes.OpActivateInstanceDisks(instance_name=instance.name) - self.proc.ChainOpCode(op) + _StartInstanceDisks(self, instance, True) if instance.disk_template == constants.DT_DRBD8: if self.op.remote_node is None: @@ -3886,8 +4219,7 @@ class LUReplaceDisks(LogicalUnit): # Deactivate the instance disks if we're replacing them on a down instance if instance.status == "down": - op = opcodes.OpDeactivateInstanceDisks(instance_name=instance.name) - self.proc.ChainOpCode(op) + _SafeShutdownInstanceDisks(self, instance) return ret @@ -3899,6 +4231,16 @@ class LUGrowDisk(LogicalUnit): HPATH = "disk-grow" HTYPE = constants.HTYPE_INSTANCE _OP_REQP = ["instance_name", "disk", "amount"] + REQ_BGL = False + + def ExpandNames(self): + self._ExpandAndLockInstance() + self.needed_locks[locking.LEVEL_NODE] = [] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + + def DeclareLocks(self, level): + if level == locking.LEVEL_NODE: + self._LockInstancesNodes() def BuildHooksEnv(self): """Build hooks env. @@ -3912,7 +4254,7 @@ class LUGrowDisk(LogicalUnit): } env.update(_BuildInstanceHookEnvByObject(self.instance)) nl = [ - self.sstore.GetMasterNode(), + self.cfg.GetMasterNode(), self.instance.primary_node, ] return env, nl, nl @@ -3923,13 +4265,11 @@ class LUGrowDisk(LogicalUnit): This checks that the instance is in the cluster. """ - instance = self.cfg.GetInstanceInfo( - self.cfg.ExpandInstanceName(self.op.instance_name)) - if instance is None: - raise errors.OpPrereqError("Instance '%s' not known" % - self.op.instance_name) + instance = self.cfg.GetInstanceInfo(self.op.instance_name) + assert instance is not None, \ + "Cannot retrieve locked instance %s" % self.op.instance_name + self.instance = instance - self.op.instance_name = instance.name if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8): raise errors.OpPrereqError("Instance's disk layout does not support" @@ -3940,7 +4280,8 @@ class LUGrowDisk(LogicalUnit): (self.op.disk, instance.name)) nodenames = [instance.primary_node] + list(instance.secondary_nodes) - nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName()) + nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName(), + instance.hypervisor) for node in nodenames: info = nodeinfo.get(node, None) if not info: @@ -3964,7 +4305,7 @@ class LUGrowDisk(LogicalUnit): for node in (instance.secondary_nodes + (instance.primary_node,)): self.cfg.SetDiskID(disk, node) result = rpc.call_blockdev_grow(node, disk, self.op.amount) - if not result or not isinstance(result, tuple) or len(result) != 2: + if not result or not isinstance(result, (list, tuple)) or len(result) != 2: raise errors.OpExecError("grow request failed to node %s" % node) elif not result[0]: raise errors.OpExecError("grow request failed to node %s: %s" % @@ -3979,6 +4320,34 @@ class LUQueryInstanceData(NoHooksLU): """ _OP_REQP = ["instances"] + REQ_BGL = False + + def ExpandNames(self): + self.needed_locks = {} + self.share_locks = dict(((i, 1) for i in locking.LEVELS)) + + if not isinstance(self.op.instances, list): + raise errors.OpPrereqError("Invalid argument type 'instances'") + + if self.op.instances: + self.wanted_names = [] + for name in self.op.instances: + full_name = self.cfg.ExpandInstanceName(name) + if full_name is None: + raise errors.OpPrereqError("Instance '%s' not known" % + self.op.instance_name) + self.wanted_names.append(full_name) + self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names + else: + self.wanted_names = None + self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET + + self.needed_locks[locking.LEVEL_NODE] = [] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + + def DeclareLocks(self, level): + if level == locking.LEVEL_NODE: + self._LockInstancesNodes() def CheckPrereq(self): """Check prerequisites. @@ -3986,21 +4355,12 @@ class LUQueryInstanceData(NoHooksLU): This only checks the optional instance list against the existing names. """ - if not isinstance(self.op.instances, list): - raise errors.OpPrereqError("Invalid argument type 'instances'") - if self.op.instances: - self.wanted_instances = [] - names = self.op.instances - for name in names: - instance = self.cfg.GetInstanceInfo(self.cfg.ExpandInstanceName(name)) - if instance is None: - raise errors.OpPrereqError("No such instance name '%s'" % name) - self.wanted_instances.append(instance) - else: - self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name - in self.cfg.GetInstanceList()] - return + if self.wanted_names is None: + self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE] + self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name + in self.wanted_names] + return def _ComputeDiskStatus(self, instance, snode, dev): """Compute block device status. @@ -4044,7 +4404,8 @@ class LUQueryInstanceData(NoHooksLU): result = {} for instance in self.wanted_instances: remote_info = rpc.call_instance_info(instance.primary_node, - instance.name) + instance.name, + instance.hypervisor) if remote_info and "state" in remote_info: remote_state = "up" else: @@ -4068,21 +4429,41 @@ class LUQueryInstanceData(NoHooksLU): "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics], "disks": disks, "vcpus": instance.vcpus, + "hypervisor": instance.hypervisor, } - htkind = self.sstore.GetHypervisorType() - if htkind == constants.HT_XEN_PVM30: + htkind = instance.hypervisor + if htkind == constants.HT_XEN_PVM: idict["kernel_path"] = instance.kernel_path idict["initrd_path"] = instance.initrd_path - if htkind == constants.HT_XEN_HVM31: + if htkind == constants.HT_XEN_HVM: idict["hvm_boot_order"] = instance.hvm_boot_order idict["hvm_acpi"] = instance.hvm_acpi idict["hvm_pae"] = instance.hvm_pae idict["hvm_cdrom_image_path"] = instance.hvm_cdrom_image_path + idict["hvm_nic_type"] = instance.hvm_nic_type + idict["hvm_disk_type"] = instance.hvm_disk_type if htkind in constants.HTS_REQ_PORT: - idict["vnc_bind_address"] = instance.vnc_bind_address + if instance.vnc_bind_address is None: + vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS + else: + vnc_bind_address = instance.vnc_bind_address + if instance.network_port is None: + vnc_console_port = None + elif vnc_bind_address == constants.BIND_ADDRESS_GLOBAL: + vnc_console_port = "%s:%s" % (instance.primary_node, + instance.network_port) + elif vnc_bind_address == constants.LOCALHOST_IP_ADDRESS: + vnc_console_port = "%s:%s on node %s" % (vnc_bind_address, + instance.network_port, + instance.primary_node) + else: + vnc_console_port = "%s:%s" % (instance.vnc_bind_address, + instance.network_port) + idict["vnc_console_port"] = vnc_console_port + idict["vnc_bind_address"] = vnc_bind_address idict["network_port"] = instance.network_port result[instance.name] = idict @@ -4097,6 +4478,10 @@ class LUSetInstanceParams(LogicalUnit): HPATH = "instance-modify" HTYPE = constants.HTYPE_INSTANCE _OP_REQP = ["instance_name"] + REQ_BGL = False + + def ExpandNames(self): + self._ExpandAndLockInstance() def BuildHooksEnv(self): """Build hooks env. @@ -4124,7 +4509,7 @@ class LUSetInstanceParams(LogicalUnit): mac = self.instance.nics[0].mac args['nics'] = [(ip, bridge, mac)] env = _BuildInstanceHookEnvByObject(self.instance, override=args) - nl = [self.sstore.GetMasterNode(), + nl = [self.cfg.GetMasterNode(), self.instance.primary_node] + list(self.instance.secondary_nodes) return env, nl, nl @@ -4134,6 +4519,9 @@ class LUSetInstanceParams(LogicalUnit): This only checks the instance list against the existing names. """ + # FIXME: all the parameters could be checked before, in ExpandNames, or in + # a separate CheckArguments function, if we implement one, so the operation + # can be aborted without waiting for any lock, should it have an error... self.mem = getattr(self.op, "mem", None) self.vcpus = getattr(self.op, "vcpus", None) self.ip = getattr(self.op, "ip", None) @@ -4144,12 +4532,15 @@ class LUSetInstanceParams(LogicalUnit): self.hvm_boot_order = getattr(self.op, "hvm_boot_order", None) self.hvm_acpi = getattr(self.op, "hvm_acpi", None) self.hvm_pae = getattr(self.op, "hvm_pae", None) + self.hvm_nic_type = getattr(self.op, "hvm_nic_type", None) + self.hvm_disk_type = getattr(self.op, "hvm_disk_type", None) self.hvm_cdrom_image_path = getattr(self.op, "hvm_cdrom_image_path", None) self.vnc_bind_address = getattr(self.op, "vnc_bind_address", None) + self.force = getattr(self.op, "force", None) all_parms = [self.mem, self.vcpus, self.ip, self.bridge, self.mac, self.kernel_path, self.initrd_path, self.hvm_boot_order, self.hvm_acpi, self.hvm_pae, self.hvm_cdrom_image_path, - self.vnc_bind_address] + self.vnc_bind_address, self.hvm_nic_type, self.hvm_disk_type] if all_parms.count(None) == len(all_parms): raise errors.OpPrereqError("No changes submitted") if self.mem is not None: @@ -4211,11 +4602,13 @@ class LUSetInstanceParams(LogicalUnit): # hvm_cdrom_image_path verification if self.op.hvm_cdrom_image_path is not None: - if not os.path.isabs(self.op.hvm_cdrom_image_path): + if not (os.path.isabs(self.op.hvm_cdrom_image_path) or + self.op.hvm_cdrom_image_path.lower() == "none"): raise errors.OpPrereqError("The path to the HVM CDROM image must" " be an absolute path or None, not %s" % self.op.hvm_cdrom_image_path) - if not os.path.isfile(self.op.hvm_cdrom_image_path): + if not (os.path.isfile(self.op.hvm_cdrom_image_path) or + self.op.hvm_cdrom_image_path.lower() == "none"): raise errors.OpPrereqError("The HVM CDROM image must either be a" " regular file or a symlink pointing to" " an existing regular file, not %s" % @@ -4228,13 +4621,54 @@ class LUSetInstanceParams(LogicalUnit): " like a valid IP address" % self.op.vnc_bind_address) - instance = self.cfg.GetInstanceInfo( - self.cfg.ExpandInstanceName(self.op.instance_name)) - if instance is None: - raise errors.OpPrereqError("No such instance name '%s'" % - self.op.instance_name) - self.op.instance_name = instance.name - self.instance = instance + instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) + assert self.instance is not None, \ + "Cannot retrieve locked instance %s" % self.op.instance_name + self.warn = [] + if self.mem is not None and not self.force: + pnode = self.instance.primary_node + nodelist = [pnode] + nodelist.extend(instance.secondary_nodes) + instance_info = rpc.call_instance_info(pnode, instance.name, + instance.hypervisor) + nodeinfo = rpc.call_node_info(nodelist, self.cfg.GetVGName(), + instance.hypervisor) + + if pnode not in nodeinfo or not isinstance(nodeinfo[pnode], dict): + # Assume the primary node is unreachable and go ahead + self.warn.append("Can't get info from primary node %s" % pnode) + else: + if instance_info: + current_mem = instance_info['memory'] + else: + # Assume instance not running + # (there is a slight race condition here, but it's not very probable, + # and we have no other way to check) + current_mem = 0 + miss_mem = self.mem - current_mem - nodeinfo[pnode]['memory_free'] + if miss_mem > 0: + raise errors.OpPrereqError("This change will prevent the instance" + " from starting, due to %d MB of memory" + " missing on its primary node" % miss_mem) + + for node in instance.secondary_nodes: + if node not in nodeinfo or not isinstance(nodeinfo[node], dict): + self.warn.append("Can't get info from secondary node %s" % node) + elif self.mem > nodeinfo[node]['memory_free']: + self.warn.append("Not enough memory to failover instance to secondary" + " node %s" % node) + + # Xen HVM device type checks + if instance.hypervisor == constants.HT_XEN_HVM: + if self.op.hvm_nic_type is not None: + if self.op.hvm_nic_type not in constants.HT_HVM_VALID_NIC_TYPES: + raise errors.OpPrereqError("Invalid NIC type %s specified for Xen" + " HVM hypervisor" % self.op.hvm_nic_type) + if self.op.hvm_disk_type is not None: + if self.op.hvm_disk_type not in constants.HT_HVM_VALID_DISK_TYPES: + raise errors.OpPrereqError("Invalid disk type %s specified for Xen" + " HVM hypervisor" % self.op.hvm_disk_type) + return def Exec(self, feedback_fn): @@ -4242,6 +4676,11 @@ class LUSetInstanceParams(LogicalUnit): All parameters take effect only at the next restart of the instance. """ + # Process here the warnings from CheckPrereq, as we don't have a + # feedback_fn there. + for warn in self.warn: + feedback_fn("WARNING: %s" % warn) + result = [] instance = self.instance if self.mem: @@ -4271,20 +4710,29 @@ class LUSetInstanceParams(LogicalUnit): else: instance.hvm_boot_order = self.hvm_boot_order result.append(("hvm_boot_order", self.hvm_boot_order)) - if self.hvm_acpi: + if self.hvm_acpi is not None: instance.hvm_acpi = self.hvm_acpi result.append(("hvm_acpi", self.hvm_acpi)) - if self.hvm_pae: + if self.hvm_pae is not None: instance.hvm_pae = self.hvm_pae result.append(("hvm_pae", self.hvm_pae)) + if self.hvm_nic_type is not None: + instance.hvm_nic_type = self.hvm_nic_type + result.append(("hvm_nic_type", self.hvm_nic_type)) + if self.hvm_disk_type is not None: + instance.hvm_disk_type = self.hvm_disk_type + result.append(("hvm_disk_type", self.hvm_disk_type)) if self.hvm_cdrom_image_path: - instance.hvm_cdrom_image_path = self.hvm_cdrom_image_path + if self.hvm_cdrom_image_path == constants.VALUE_NONE: + instance.hvm_cdrom_image_path = None + else: + instance.hvm_cdrom_image_path = self.hvm_cdrom_image_path result.append(("hvm_cdrom_image_path", self.hvm_cdrom_image_path)) if self.vnc_bind_address: instance.vnc_bind_address = self.vnc_bind_address result.append(("vnc_bind_address", self.vnc_bind_address)) - self.cfg.AddInstance(instance) + self.cfg.Update(instance) return result @@ -4293,13 +4741,23 @@ class LUQueryExports(NoHooksLU): """Query the exports list """ - _OP_REQP = [] + _OP_REQP = ['nodes'] + REQ_BGL = False + + def ExpandNames(self): + self.needed_locks = {} + self.share_locks[locking.LEVEL_NODE] = 1 + if not self.op.nodes: + self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET + else: + self.needed_locks[locking.LEVEL_NODE] = \ + _GetWantedNodes(self, self.op.nodes) def CheckPrereq(self): - """Check that the nodelist contains only existing nodes. + """Check prerequisites. """ - self.nodes = _GetWantedNodes(self, getattr(self.op, "nodes", None)) + self.nodes = self.acquired_locks[locking.LEVEL_NODE] def Exec(self, feedback_fn): """Compute the list of all the exported system images. @@ -4320,6 +4778,23 @@ class LUExportInstance(LogicalUnit): HPATH = "instance-export" HTYPE = constants.HTYPE_INSTANCE _OP_REQP = ["instance_name", "target_node", "shutdown"] + REQ_BGL = False + + def ExpandNames(self): + self._ExpandAndLockInstance() + # FIXME: lock only instance primary and destination node + # + # Sad but true, for now we have do lock all nodes, as we don't know where + # the previous export might be, and and in this LU we search for it and + # remove it from its current node. In the future we could fix this by: + # - making a tasklet to search (share-lock all), then create the new one, + # then one to remove, after + # - removing the removal operation altoghether + self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET + + def DeclareLocks(self, level): + """Last minute lock declaration.""" + # All nodes are locked anyway, so nothing to do here. def BuildHooksEnv(self): """Build hooks env. @@ -4332,7 +4807,7 @@ class LUExportInstance(LogicalUnit): "EXPORT_DO_SHUTDOWN": self.op.shutdown, } env.update(_BuildInstanceHookEnvByObject(self.instance)) - nl = [self.sstore.GetMasterNode(), self.instance.primary_node, + nl = [self.cfg.GetMasterNode(), self.instance.primary_node, self.op.target_node] return env, nl, nl @@ -4342,20 +4817,16 @@ class LUExportInstance(LogicalUnit): This checks that the instance and node names are valid. """ - instance_name = self.cfg.ExpandInstanceName(self.op.instance_name) + instance_name = self.op.instance_name self.instance = self.cfg.GetInstanceInfo(instance_name) - if self.instance is None: - raise errors.OpPrereqError("Instance '%s' not found" % - self.op.instance_name) + assert self.instance is not None, \ + "Cannot retrieve locked instance %s" % self.op.instance_name - # node verification - dst_node_short = self.cfg.ExpandNodeName(self.op.target_node) - self.dst_node = self.cfg.GetNodeInfo(dst_node_short) + self.dst_node = self.cfg.GetNodeInfo( + self.cfg.ExpandNodeName(self.op.target_node)) - if self.dst_node is None: - raise errors.OpPrereqError("Destination node '%s' is unknown." % - self.op.target_node) - self.op.target_node = self.dst_node.name + assert self.dst_node is not None, \ + "Cannot retrieve locked node %s" % self.op.target_node # instance disk type verification for disk in self.instance.disks: @@ -4373,8 +4844,8 @@ class LUExportInstance(LogicalUnit): if self.op.shutdown: # shutdown the instance, but not the disks if not rpc.call_instance_shutdown(src_node, instance): - raise errors.OpExecError("Could not shutdown instance %s on node %s" % - (instance.name, src_node)) + raise errors.OpExecError("Could not shutdown instance %s on node %s" % + (instance.name, src_node)) vgname = self.cfg.GetVGName() @@ -4399,13 +4870,15 @@ class LUExportInstance(LogicalUnit): finally: if self.op.shutdown and instance.status == "up": if not rpc.call_instance_start(src_node, instance, None): - _ShutdownInstanceDisks(instance, self.cfg) + _ShutdownInstanceDisks(self, instance) raise errors.OpExecError("Could not start instance") # TODO: check for size + cluster_name = self.cfg.GetClusterName() for dev in snap_disks: - if not rpc.call_snapshot_export(src_node, dev, dst_node.name, instance): + if not rpc.call_snapshot_export(src_node, dev, dst_node.name, + instance, cluster_name): logger.Error("could not export block device %s from node %s to node %s" % (dev.logical_id[1], src_node, dst_node.name)) if not rpc.call_blockdev_remove(src_node, dev): @@ -4423,8 +4896,7 @@ class LUExportInstance(LogicalUnit): # if we proceed the backup would be removed because OpQueryExports # substitutes an empty list with the full cluster node list. if nodelist: - op = opcodes.OpQueryExports(nodes=nodelist) - exportlist = self.proc.ChainOpCode(op) + exportlist = rpc.call_export_list(nodelist) for node in exportlist: if instance.name in exportlist[node]: if not rpc.call_export_remove(node, instance.name): @@ -4437,6 +4909,14 @@ class LURemoveExport(NoHooksLU): """ _OP_REQP = ["instance_name"] + REQ_BGL = False + + def ExpandNames(self): + self.needed_locks = {} + # We need all nodes to be locked in order for RemoveExport to work, but we + # don't need to lock the instance itself, as nothing will happen to it (and + # we can remove exports also for a removed instance) + self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET def CheckPrereq(self): """Check prerequisites. @@ -4455,8 +4935,7 @@ class LURemoveExport(NoHooksLU): fqdn_warn = True instance_name = self.op.instance_name - op = opcodes.OpQueryExports(nodes=[]) - exportlist = self.proc.ChainOpCode(op) + exportlist = rpc.call_export_list(self.acquired_locks[locking.LEVEL_NODE]) found = False for node in exportlist: if instance_name in exportlist[node]: @@ -4477,26 +4956,34 @@ class TagsLU(NoHooksLU): This is an abstract class which is the parent of all the other tags LUs. """ - def CheckPrereq(self): - """Check prerequisites. - """ - if self.op.kind == constants.TAG_CLUSTER: - self.target = self.cfg.GetClusterInfo() - elif self.op.kind == constants.TAG_NODE: + def ExpandNames(self): + self.needed_locks = {} + if self.op.kind == constants.TAG_NODE: name = self.cfg.ExpandNodeName(self.op.name) if name is None: raise errors.OpPrereqError("Invalid node name (%s)" % (self.op.name,)) self.op.name = name - self.target = self.cfg.GetNodeInfo(name) + self.needed_locks[locking.LEVEL_NODE] = name elif self.op.kind == constants.TAG_INSTANCE: name = self.cfg.ExpandInstanceName(self.op.name) if name is None: raise errors.OpPrereqError("Invalid instance name (%s)" % (self.op.name,)) self.op.name = name - self.target = self.cfg.GetInstanceInfo(name) + self.needed_locks[locking.LEVEL_INSTANCE] = name + + def CheckPrereq(self): + """Check prerequisites. + + """ + if self.op.kind == constants.TAG_CLUSTER: + self.target = self.cfg.GetClusterInfo() + elif self.op.kind == constants.TAG_NODE: + self.target = self.cfg.GetNodeInfo(self.op.name) + elif self.op.kind == constants.TAG_INSTANCE: + self.target = self.cfg.GetInstanceInfo(self.op.name) else: raise errors.OpPrereqError("Wrong tag type requested (%s)" % str(self.op.kind)) @@ -4507,12 +4994,13 @@ class LUGetTags(TagsLU): """ _OP_REQP = ["kind", "name"] + REQ_BGL = False def Exec(self, feedback_fn): """Returns the tag list. """ - return self.target.GetTags() + return list(self.target.GetTags()) class LUSearchTags(NoHooksLU): @@ -4520,6 +5008,10 @@ class LUSearchTags(NoHooksLU): """ _OP_REQP = ["pattern"] + REQ_BGL = False + + def ExpandNames(self): + self.needed_locks = {} def CheckPrereq(self): """Check prerequisites. @@ -4539,9 +5031,9 @@ class LUSearchTags(NoHooksLU): """ cfg = self.cfg tgts = [("/cluster", cfg.GetClusterInfo())] - ilist = [cfg.GetInstanceInfo(name) for name in cfg.GetInstanceList()] + ilist = cfg.GetAllInstancesInfo().values() tgts.extend([("/instances/%s" % i.name, i) for i in ilist]) - nlist = [cfg.GetNodeInfo(name) for name in cfg.GetNodeList()] + nlist = cfg.GetAllNodesInfo().values() tgts.extend([("/nodes/%s" % n.name, n) for n in nlist]) results = [] for path, target in tgts: @@ -4556,6 +5048,7 @@ class LUAddTags(TagsLU): """ _OP_REQP = ["kind", "name", "tags"] + REQ_BGL = False def CheckPrereq(self): """Check prerequisites. @@ -4589,6 +5082,7 @@ class LUDelTags(TagsLU): """ _OP_REQP = ["kind", "name", "tags"] + REQ_BGL = False def CheckPrereq(self): """Check prerequisites. @@ -4672,7 +5166,7 @@ class IAllocator(object): """IAllocator framework. An IAllocator instance has three sets of attributes: - - cfg/sstore that are needed to query the cluster + - cfg that is needed to query the cluster - input data (all members of the _KEYS class attribute are required) - four buffer attributes (in|out_data|text), that represent the input (to the external script) in text and data structure format, @@ -4689,9 +5183,8 @@ class IAllocator(object): "relocate_from", ] - def __init__(self, cfg, sstore, mode, name, **kwargs): + def __init__(self, cfg, mode, name, **kwargs): self.cfg = cfg - self.sstore = sstore # init buffer variables self.in_text = self.out_text = self.in_data = self.out_data = None # init all input fields so that pylint is happy @@ -4729,12 +5222,13 @@ class IAllocator(object): """ cfg = self.cfg + cluster_info = cfg.GetClusterInfo() # cluster data data = { "version": 1, - "cluster_name": self.sstore.GetClusterName(), - "cluster_tags": list(cfg.GetClusterInfo().GetTags()), - "hypervisor_type": self.sstore.GetHypervisorType(), + "cluster_name": self.cfg.GetClusterName(), + "cluster_tags": list(cluster_info.GetTags()), + "enable_hypervisors": list(cluster_info.enabled_hypervisors), # we don't have job IDs } @@ -4743,7 +5237,10 @@ class IAllocator(object): # node data node_results = {} node_list = cfg.GetNodeList() - node_data = rpc.call_node_info(node_list, cfg.GetVGName()) + # FIXME: here we have only one hypervisor information, but + # instance can belong to different hypervisors + node_data = rpc.call_node_info(node_list, cfg.GetVGName(), + cfg.GetHypervisorType()) for nname in node_list: ninfo = cfg.GetNodeInfo(nname) if nname not in node_data or not isinstance(node_data[nname], dict): @@ -4799,6 +5296,7 @@ class IAllocator(object): "nics": nic_data, "disks": [{"size": dsk.size, "mode": "w"} for dsk in iinfo.disks], "disk_template": iinfo.disk_template, + "hypervisor": iinfo.hypervisor, } instance_data[iinfo.name] = pir @@ -4897,9 +5395,9 @@ class IAllocator(object): """ data = self.in_text - result = call_fn(self.sstore.GetMasterNode(), name, self.in_text) + result = call_fn(self.cfg.GetMasterNode(), name, self.in_text) - if not isinstance(result, tuple) or len(result) != 4: + if not isinstance(result, (list, tuple)) or len(result) != 4: raise errors.OpExecError("Invalid result from master iallocator runner") rcode, stdout, stderr, fail = result @@ -4907,9 +5405,8 @@ class IAllocator(object): if rcode == constants.IARUN_NOTFOUND: raise errors.OpExecError("Can't find allocator '%s'" % name) elif rcode == constants.IARUN_FAILURE: - raise errors.OpExecError("Instance allocator call failed: %s," - " output: %s" % - (fail, stdout+stderr)) + raise errors.OpExecError("Instance allocator call failed: %s," + " output: %s" % (fail, stdout+stderr)) self.out_text = stdout if validate: self._ValidateResult() @@ -5011,7 +5508,7 @@ class LUTestAllocator(NoHooksLU): """ if self.op.mode == constants.IALLOCATOR_MODE_ALLOC: - ial = IAllocator(self.cfg, self.sstore, + ial = IAllocator(self.cfg, mode=self.op.mode, name=self.op.name, mem_size=self.op.mem_size, @@ -5023,7 +5520,7 @@ class LUTestAllocator(NoHooksLU): vcpus=self.op.vcpus, ) else: - ial = IAllocator(self.cfg, self.sstore, + ial = IAllocator(self.cfg, mode=self.op.mode, name=self.op.name, relocate_from=list(self.relocate_from),