def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
memory, vcpus, nics, disk_template, disks,
- bep, hvp, hypervisor_name):
+ bep, hvp, hypervisor_name, tags):
"""Builds instance related env variables for hooks
This builds the hook environment from individual variables.
@param hvp: the hypervisor parameters for the instance
@type hypervisor_name: string
@param hypervisor_name: the hypervisor for the instance
+ @type tags: list
+ @param tags: list of instance tags as strings
@rtype: dict
@return: the hook environment for this instance
env["INSTANCE_DISK_COUNT"] = disk_count
+ if not tags:
+ tags = []
+
+ env["INSTANCE_TAGS"] = " ".join(tags)
+
for source, kind in [(bep, "BE"), (hvp, "HV")]:
for key, value in source.items():
env["INSTANCE_%s_%s" % (kind, key)] = value
'bep': bep,
'hvp': hvp,
'hypervisor_name': instance.hypervisor,
+ 'tags': instance.tags,
}
if override:
args.update(override)
def _VerifyCertificate(filename):
- """Verifies a certificate for LUClusterVerifyConfig.
+ """Verifies a certificate for L{LUClusterVerifyConfig}.
@type filename: string
@param filename: Path to PEM file
ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
+ ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
+ ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
"""Verifies the cluster config.
"""
-
- REQ_BGL = False
+ REQ_BGL = True
def _VerifyHVP(self, hvp_data):
"""Verifies locally the syntax of the hypervisor parameters.
self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
def ExpandNames(self):
+ # Information can be safely retrieved as the BGL is acquired in exclusive
+ # mode
self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
+ self.all_node_info = self.cfg.GetAllNodesInfo()
self.all_inst_info = self.cfg.GetAllInstancesInfo()
self.needed_locks = {}
self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
self.all_inst_info.values()))
+ feedback_fn("* Verifying all nodes belong to an existing group")
+
+ # We do this verification here because, should this bogus circumstance
+ # occur, it would never be caught by VerifyGroup, which only acts on
+ # nodes/instances reachable from existing node groups.
+
+ dangling_nodes = set(node.name for node in self.all_node_info.values()
+ if node.group not in self.all_group_info)
+
+ dangling_instances = {}
+ no_node_instances = []
+
+ for inst in self.all_inst_info.values():
+ if inst.primary_node in dangling_nodes:
+ dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
+ elif inst.primary_node not in self.all_node_info:
+ no_node_instances.append(inst.name)
+
+ pretty_dangling = [
+ "%s (%s)" %
+ (node.name,
+ utils.CommaJoin(dangling_instances.get(node.name,
+ ["no instances"])))
+ for node in dangling_nodes]
+
+ self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
+ "the following nodes (and their instances) belong to a non"
+ " existing group: %s", utils.CommaJoin(pretty_dangling))
+
+ self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
+ "the following instances have a non-existing primary-node:"
+ " %s", utils.CommaJoin(no_node_instances))
+
return (not self.bad, [g.name for g in self.all_group_info.values()])
"""Verifies the status of a node group.
"""
-
HPATH = "cluster-verify"
HTYPE = constants.HTYPE_CLUSTER
REQ_BGL = False
# This raises errors.OpPrereqError on its own:
self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
- all_node_info = self.cfg.GetAllNodesInfo()
- all_inst_info = self.cfg.GetAllInstancesInfo()
-
- node_names = set(node.name
- for node in all_node_info.values()
- if node.group == self.group_uuid)
-
- inst_names = [inst.name
- for inst in all_inst_info.values()
- if inst.primary_node in node_names]
+ # Get instances in node group; this is unsafe and needs verification later
+ inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
self.needed_locks = {
- locking.LEVEL_NODEGROUP: [self.group_uuid],
- locking.LEVEL_NODE: list(node_names),
locking.LEVEL_INSTANCE: inst_names,
- }
+ locking.LEVEL_NODEGROUP: [self.group_uuid],
+ locking.LEVEL_NODE: [],
+ }
self.share_locks = dict.fromkeys(locking.LEVELS, 1)
- def CheckPrereq(self):
- self.all_node_info = self.cfg.GetAllNodesInfo()
- self.all_inst_info = self.cfg.GetAllInstancesInfo()
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ # Get members of node group; this is unsafe and needs verification later
+ nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
+
+ all_inst_info = self.cfg.GetAllInstancesInfo()
- group_nodes = set(node.name
- for node in self.all_node_info.values()
- if node.group == self.group_uuid)
+ # In Exec(), we warn about mirrored instances that have primary and
+ # secondary living in separate node groups. To fully verify that
+ # volumes for these instances are healthy, we will need to do an
+ # extra call to their secondaries. We ensure here those nodes will
+ # be locked.
+ for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
+ # Important: access only the instances whose lock is owned
+ if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
+ nodes.update(all_inst_info[inst].secondary_nodes)
- group_instances = set(inst.name
- for inst in self.all_inst_info.values()
- if inst.primary_node in group_nodes)
+ self.needed_locks[locking.LEVEL_NODE] = nodes
+
+ def CheckPrereq(self):
+ group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
+ group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
unlocked_nodes = \
group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
if unlocked_nodes:
- raise errors.OpPrereqError("missing lock for nodes: %s" %
+ raise errors.OpPrereqError("Missing lock for nodes: %s" %
utils.CommaJoin(unlocked_nodes))
if unlocked_instances:
- raise errors.OpPrereqError("missing lock for instances: %s" %
+ raise errors.OpPrereqError("Missing lock for instances: %s" %
utils.CommaJoin(unlocked_instances))
+ self.all_node_info = self.cfg.GetAllNodesInfo()
+ self.all_inst_info = self.cfg.GetAllInstancesInfo()
+
self.my_node_names = utils.NiceSort(group_nodes)
self.my_inst_names = utils.NiceSort(group_instances)
self.my_inst_info = dict((name, self.all_inst_info[name])
for name in self.my_inst_names)
+ # We detect here the nodes that will need the extra RPC calls for verifying
+ # split LV volumes; they should be locked.
+ extra_lv_nodes = set()
+
+ for inst in self.my_inst_info.values():
+ if inst.disk_template in constants.DTS_INT_MIRROR:
+ group = self.my_node_info[inst.primary_node].group
+ for nname in inst.secondary_nodes:
+ if self.all_node_info[nname].group != group:
+ extra_lv_nodes.add(nname)
+
+ unlocked_lv_nodes = \
+ extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
+
+ if unlocked_lv_nodes:
+ raise errors.OpPrereqError("these nodes could be locked: %s" %
+ utils.CommaJoin(unlocked_lv_nodes))
+ self.extra_lv_nodes = list(extra_lv_nodes)
+
def _VerifyNode(self, ninfo, nresult):
"""Perform some basic validation on data returned from a node.
for nname in inst_config.all_nodes:
if nname not in node_image:
- # ghost node
gnode = self.NodeImage(name=nname)
- gnode.ghost = True
+ gnode.ghost = (nname not in self.all_node_info)
node_image[nname] = gnode
inst_config.MapLVsByNode(node_vol_should)
self.cfg.GetClusterName())
nvinfo_endtime = time.time()
+ if self.extra_lv_nodes and vg_name is not None:
+ extra_lv_nvinfo = \
+ self.rpc.call_node_verify(self.extra_lv_nodes,
+ {constants.NV_LVLIST: vg_name},
+ self.cfg.GetClusterName())
+ else:
+ extra_lv_nvinfo = {}
+
all_drbd_map = self.cfg.ComputeDRBDMap()
feedback_fn("* Gathering disk information (%s nodes)" %
_ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
"node is running unknown instance %s", inst)
+ for node, result in extra_lv_nvinfo.items():
+ self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
+ node_image[node], vg_name)
+
feedback_fn("* Verifying instance status")
for instance in self.my_inst_names:
if verbose:
feedback_fn("* Verifying orphan volumes")
reserved = utils.FieldSet(*cluster.reserved_lvs)
+
+ # We will get spurious "unknown volume" warnings if any node of this group
+ # is secondary for an instance whose primary is in another group. To avoid
+ # them, we find these instances and add their volumes to node_vol_should.
+ for inst in self.all_inst_info.values():
+ for secondary in inst.secondary_nodes:
+ if (secondary in self.my_node_info
+ and inst.name not in self.my_inst_info):
+ inst.MapLVsByNode(node_vol_should)
+ break
+
self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
REQ_BGL = False
def CheckArguments(self):
- _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
+ pass
def ExpandNames(self):
self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
- self.needed_locks = {}
-
- # Create tasklets for migrating instances for all instances on this node
- names = []
- tasklets = []
-
- self.lock_all_nodes = False
-
- for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
- logging.debug("Migrating instance %s", inst.name)
- names.append(inst.name)
-
- tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False))
-
- if inst.disk_template in constants.DTS_EXT_MIRROR:
- # We need to lock all nodes, as the iallocator will choose the
- # destination nodes afterwards
- self.lock_all_nodes = True
-
- self.tasklets = tasklets
-
- # Declare node locks
- if self.lock_all_nodes:
- self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
- else:
- self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
- self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
-
- # Declare instance locks
- self.needed_locks[locking.LEVEL_INSTANCE] = names
-
- def DeclareLocks(self, level):
- if level == locking.LEVEL_NODE and not self.lock_all_nodes:
- self._LockInstancesNodes()
+ self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+ self.needed_locks = {
+ locking.LEVEL_NODE: [self.op.node_name],
+ }
def BuildHooksEnv(self):
"""Build hooks env.
nl = [self.cfg.GetMasterNode()]
return (nl, nl)
+ def CheckPrereq(self):
+ pass
+
+ def Exec(self, feedback_fn):
+ # Prepare jobs for migration instances
+ jobs = [
+ [opcodes.OpInstanceMigrate(instance_name=inst.name,
+ mode=self.op.mode,
+ live=self.op.live,
+ iallocator=self.op.iallocator,
+ target_node=self.op.target_node)]
+ for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
+ ]
+
+ # TODO: Run iallocator in this opcode and pass correct placement options to
+ # OpInstanceMigrate. Since other jobs can modify the cluster between
+ # running the iallocator and the actual migration, a good consistency model
+ # will have to be found.
+
+ assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
+ frozenset([self.op.node_name]))
+
+ return ResultWithJobs(jobs)
+
class TLMigrateInstance(Tasklet):
"""Tasklet class for instance migration.
mode=constants.IALLOCATOR_MODE_ALLOC,
name=self.op.instance_name,
disk_template=self.op.disk_template,
- tags=[],
+ tags=self.op.tags,
os=self.op.os_type,
vcpus=self.be_full[constants.BE_VCPUS],
- mem_size=self.be_full[constants.BE_MEMORY],
+ memory=self.be_full[constants.BE_MEMORY],
disks=self.disks,
nics=nics,
hypervisor=self.op.hypervisor,
bep=self.be_full,
hvp=self.hv_full,
hypervisor_name=self.op.hypervisor,
+ tags=self.op.tags,
))
return env
nics.append(ndict)
self.op.nics = nics
+ if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
+ self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
+
if (self.op.hypervisor is None and
einfo.has_option(constants.INISECT_INS, "hypervisor")):
self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
+
if einfo.has_section(constants.INISECT_HYP):
# use the export parameters but do not override the ones
# specified by the user
",".join(enabled_hvs)),
errors.ECODE_STATE)
+ # Check tag validity
+ for tag in self.op.tags:
+ objects.TaggableObject.ValidateTag(tag)
+
# check hypervisor parameter syntax (locally)
utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
osparams=self.op.osparams,
)
+ if self.op.tags:
+ for tag in self.op.tags:
+ iobj.AddTag(tag)
+
if self.adopt_disks:
if self.op.disk_template == constants.DT_PLAIN:
# rename LVs to the newly-generated names; we need to construct
locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
def Exec(self, feedback_fn):
+ instances = []
+ for node in self.op.nodes:
+ instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
+ if not instances:
+ return []
+
if self.op.remote_node is not None:
- instances = []
- for node in self.op.nodes:
- instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
result = []
for i in instances:
if i.primary_node == self.op.remote_node:
self.in_text = self.out_text = self.in_data = self.out_data = None
# init all input fields so that pylint is happy
self.mode = mode
- self.mem_size = self.disks = self.disk_template = None
+ self.memory = self.disks = self.disk_template = None
self.os = self.tags = self.nics = self.vcpus = None
self.hypervisor = None
self.relocate_from = None
self.name = None
self.evac_nodes = None
self.instances = None
- self.reloc_mode = None
- self.target_groups = None
+ self.evac_mode = None
+ self.target_groups = []
# computed fields
self.required_nodes = None
# init result fields
self.success = self.info = self.result = None
try:
- (fn, keyset, self._result_check) = self._MODE_DATA[self.mode]
+ (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
except KeyError:
raise errors.ProgrammerError("Unknown mode '%s' passed to the"
" IAllocator" % self.mode)
+ keyset = [n for (n, _) in keydata]
+
for key in kwargs:
if key not in keyset:
raise errors.ProgrammerError("Invalid input parameter '%s' to"
if key not in kwargs:
raise errors.ProgrammerError("Missing input parameter '%s' to"
" IAllocator" % key)
- self._BuildInputData(compat.partial(fn, self))
+ self._BuildInputData(compat.partial(fn, self), keydata)
def _ComputeClusterData(self):
"""Compute the generic allocator input data.
hypervisor_name = self.hypervisor
elif self.mode == constants.IALLOCATOR_MODE_RELOC:
hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
- elif self.mode in (constants.IALLOCATOR_MODE_MEVAC,
- constants.IALLOCATOR_MODE_MRELOC):
+ else:
hypervisor_name = cluster_info.enabled_hypervisors[0]
node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
"tags": self.tags,
"os": self.os,
"vcpus": self.vcpus,
- "memory": self.mem_size,
+ "memory": self.memory,
"disks": self.disks,
"disk_space_total": disk_space,
"nics": self.nics,
"required_nodes": self.required_nodes,
+ "hypervisor": self.hypervisor,
}
return request
}
return request
- def _AddMultiRelocate(self):
- """Get data for multi-relocate requests.
+ def _AddNodeEvacuate(self):
+ """Get data for node-evacuate requests.
+
+ """
+ return {
+ "instances": self.instances,
+ "evac_mode": self.evac_mode,
+ }
+
+ def _AddChangeGroup(self):
+ """Get data for node-evacuate requests.
"""
return {
"instances": self.instances,
- "reloc_mode": self.reloc_mode,
"target_groups": self.target_groups,
}
- def _BuildInputData(self, fn):
+ def _BuildInputData(self, fn, keydata):
"""Build input data structures.
"""
request = fn()
request["type"] = self.mode
+ for keyname, keytype in keydata:
+ if keyname not in request:
+ raise errors.ProgrammerError("Request parameter %s is missing" %
+ keyname)
+ val = request[keyname]
+ if not keytype(val):
+ raise errors.ProgrammerError("Request parameter %s doesn't pass"
+ " validation, value %s, expected"
+ " type %s" % (keyname, val, keytype))
self.in_data["request"] = request
self.in_text = serializer.Dump(self.in_data)
+ _STRING_LIST = ht.TListOf(ht.TString)
+ _JOBSET_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
+ # pylint: disable-msg=E1101
+ # Class '...' has no 'OP_ID' member
+ "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
+ opcodes.OpInstanceMigrate.OP_ID,
+ opcodes.OpInstanceReplaceDisks.OP_ID])
+ })))
_MODE_DATA = {
constants.IALLOCATOR_MODE_ALLOC:
(_AddNewInstance,
- ["name", "mem_size", "disks", "disk_template", "os", "tags", "nics",
- "vcpus", "hypervisor"], ht.TList),
+ [
+ ("name", ht.TString),
+ ("memory", ht.TInt),
+ ("disks", ht.TListOf(ht.TDict)),
+ ("disk_template", ht.TString),
+ ("os", ht.TString),
+ ("tags", _STRING_LIST),
+ ("nics", ht.TListOf(ht.TDict)),
+ ("vcpus", ht.TInt),
+ ("hypervisor", ht.TString),
+ ], ht.TList),
constants.IALLOCATOR_MODE_RELOC:
- (_AddRelocateInstance, ["name", "relocate_from"], ht.TList),
+ (_AddRelocateInstance,
+ [("name", ht.TString), ("relocate_from", _STRING_LIST)],
+ ht.TList),
constants.IALLOCATOR_MODE_MEVAC:
- (_AddEvacuateNodes, ["evac_nodes"],
- ht.TListOf(ht.TAnd(ht.TIsLength(2),
- ht.TListOf(ht.TString)))),
- constants.IALLOCATOR_MODE_MRELOC:
- (_AddMultiRelocate, ["instances", "reloc_mode", "target_groups"],
- ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
- # pylint: disable-msg=E1101
- # Class '...' has no 'OP_ID' member
- "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
- opcodes.OpInstanceMigrate.OP_ID,
- opcodes.OpInstanceReplaceDisks.OP_ID])
- })))),
+ (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
+ ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
+ constants.IALLOCATOR_MODE_NODE_EVAC:
+ (_AddNodeEvacuate, [
+ ("instances", _STRING_LIST),
+ ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
+ ], _JOBSET_LIST),
+ constants.IALLOCATOR_MODE_CHG_GROUP:
+ (_AddChangeGroup, [
+ ("instances", _STRING_LIST),
+ ("target_groups", _STRING_LIST),
+ ], _JOBSET_LIST),
}
def Run(self, name, validate=True, call_fn=None):
"""
if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
- for attr in ["mem_size", "disks", "disk_template",
+ for attr in ["memory", "disks", "disk_template",
"os", "tags", "nics", "vcpus"]:
if not hasattr(self.op, attr):
raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
errors.ECODE_INVAL)
for row in self.op.disks:
if (not isinstance(row, dict) or
- "size" not in row or
- not isinstance(row["size"], int) or
- "mode" not in row or
- row["mode"] not in ['r', 'w']):
+ constants.IDISK_SIZE not in row or
+ not isinstance(row[constants.IDISK_SIZE], int) or
+ constants.IDISK_MODE not in row or
+ row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
raise errors.OpPrereqError("Invalid contents of the 'disks'"
" parameter", errors.ECODE_INVAL)
if self.op.hypervisor is None:
if not hasattr(self.op, "evac_nodes"):
raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
" opcode input", errors.ECODE_INVAL)
- elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
- if self.op.instances:
- self.op.instances = _GetWantedInstances(self, self.op.instances)
- else:
- raise errors.OpPrereqError("Missing instances to relocate",
- errors.ECODE_INVAL)
+ elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
+ constants.IALLOCATOR_MODE_NODE_EVAC):
+ if not self.op.instances:
+ raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
+ self.op.instances = _GetWantedInstances(self, self.op.instances)
else:
raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
self.op.mode, errors.ECODE_INVAL)
ial = IAllocator(self.cfg, self.rpc,
mode=self.op.mode,
name=self.op.name,
- mem_size=self.op.mem_size,
+ memory=self.op.memory,
disks=self.op.disks,
disk_template=self.op.disk_template,
os=self.op.os,
ial = IAllocator(self.cfg, self.rpc,
mode=self.op.mode,
evac_nodes=self.op.evac_nodes)
- elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
+ elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
ial = IAllocator(self.cfg, self.rpc,
mode=self.op.mode,
instances=self.op.instances,
- reloc_mode=self.op.reloc_mode,
target_groups=self.op.target_groups)
+ elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
+ ial = IAllocator(self.cfg, self.rpc,
+ mode=self.op.mode,
+ instances=self.op.instances,
+ evac_mode=self.op.evac_mode)
else:
raise errors.ProgrammerError("Uncatched mode %s in"
" LUTestAllocator.Exec", self.op.mode)