ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
+ ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
+ ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
"""Verifies the cluster config.
"""
-
REQ_BGL = False
def _VerifyHVP(self, hvp_data):
def ExpandNames(self):
self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
+ self.all_node_info = self.cfg.GetAllNodesInfo()
self.all_inst_info = self.cfg.GetAllInstancesInfo()
self.needed_locks = {}
self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
self.all_inst_info.values()))
+ feedback_fn("* Verifying all nodes belong to an existing group")
+
+ # We do this verification here because, should this bogus circumstance
+ # occur, it would never be catched by VerifyGroup, which only acts on
+ # nodes/instances reachable from existing node groups.
+
+ dangling_nodes = set(node.name for node in self.all_node_info.values()
+ if node.group not in self.all_group_info)
+
+ dangling_instances = {}
+ no_node_instances = []
+
+ for inst in self.all_inst_info.values():
+ if inst.primary_node in dangling_nodes:
+ dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
+ elif inst.primary_node not in self.all_node_info:
+ no_node_instances.append(inst.name)
+
+ pretty_dangling = [
+ "%s (%s)" %
+ (node.name,
+ utils.CommaJoin(dangling_instances.get(node.name,
+ ["no instances"])))
+ for node in dangling_nodes]
+
+ self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
+ "the following nodes (and their instances) belong to a non"
+ " existing group: %s", utils.CommaJoin(pretty_dangling))
+
+ self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
+ "the following instances have a non-existing primary-node:"
+ " %s", utils.CommaJoin(no_node_instances))
+
return (not self.bad, [g.name for g in self.all_group_info.values()])
"""Verifies the status of a node group.
"""
-
HPATH = "cluster-verify"
HTYPE = constants.HTYPE_CLUSTER
REQ_BGL = False
for inst in all_inst_info.values()
if inst.primary_node in node_names]
+ # In Exec(), we warn about mirrored instances that have primary and
+ # secondary living in separate node groups. To fully verify that
+ # volumes for these instances are healthy, we will need to do an
+ # extra call to their secondaries. We ensure here those nodes will
+ # be locked.
+ for inst in inst_names:
+ if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
+ node_names.update(all_inst_info[inst].secondary_nodes)
+
self.needed_locks = {
locking.LEVEL_NODEGROUP: [self.group_uuid],
locking.LEVEL_NODE: list(node_names),
self.my_inst_info = dict((name, self.all_inst_info[name])
for name in self.my_inst_names)
+ # We detect here the nodes that will need the extra RPC calls for verifying
+ # split LV volumes; they should be locked.
+ extra_lv_nodes = set()
+
+ for inst in self.my_inst_info.values():
+ if inst.disk_template in constants.DTS_INT_MIRROR:
+ group = self.my_node_info[inst.primary_node].group
+ for nname in inst.secondary_nodes:
+ if self.all_node_info[nname].group != group:
+ extra_lv_nodes.add(nname)
+
+ unlocked_lv_nodes = \
+ extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
+
+ if unlocked_lv_nodes:
+ raise errors.OpPrereqError("these nodes could be locked: %s" %
+ utils.CommaJoin(unlocked_lv_nodes))
+ self.extra_lv_nodes = list(extra_lv_nodes)
+
def _VerifyNode(self, ninfo, nresult):
"""Perform some basic validation on data returned from a node.
master_ip = self.cfg.GetMasterIP()
feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
+
+ # We will make nodes contact all nodes in their group, and one node from
+ # every other group.
+ # TODO: should it be a *random* node, different every time?
+ online_nodes = [node.name for node in node_data_list if not node.offline]
+ other_group_nodes = {}
+
+ for name in sorted(self.all_node_info):
+ node = self.all_node_info[name]
+ if (node.group not in other_group_nodes
+ and node.group != self.group_uuid
+ and not node.offline):
+ other_group_nodes[node.group] = node.name
+
node_verify_param = {
constants.NV_FILELIST:
utils.UniqueSequence(filename
for files in filemap
for filename in files),
- constants.NV_NODELIST: [node.name for node in self.all_node_info.values()
- if not node.offline],
+ constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
constants.NV_HYPERVISOR: hypervisors,
constants.NV_HVPARAMS:
_GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
for nname in inst_config.all_nodes:
if nname not in node_image:
- # ghost node
gnode = self.NodeImage(name=nname)
- gnode.ghost = True
+ gnode.ghost = (nname not in self.all_node_info)
node_image[nname] = gnode
inst_config.MapLVsByNode(node_vol_should)
all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
node_verify_param,
self.cfg.GetClusterName())
+ if self.extra_lv_nodes and vg_name is not None:
+ extra_lv_nvinfo = \
+ self.rpc.call_node_verify(self.extra_lv_nodes,
+ {constants.NV_LVLIST: vg_name},
+ self.cfg.GetClusterName())
+ else:
+ extra_lv_nvinfo = {}
nvinfo_endtime = time.time()
all_drbd_map = self.cfg.ComputeDRBDMap()
_ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
"node is running unknown instance %s", inst)
+ for node, result in extra_lv_nvinfo.items():
+ self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
+ node_image[node], vg_name)
+
feedback_fn("* Verifying instance status")
for instance in self.my_inst_names:
if verbose:
feedback_fn("* Verifying orphan volumes")
reserved = utils.FieldSet(*cluster.reserved_lvs)
+
+ # We will get spurious "unknown volume" warnings if any node of this group
+ # is secondary for an instance whose primary is in another group. To avoid
+ # them, we find these instances and add their volumes to node_vol_should.
+ for inst in self.all_inst_info.values():
+ for secondary in inst.secondary_nodes:
+ if (secondary in self.my_node_info
+ and inst.name not in self.my_inst_info):
+ inst.MapLVsByNode(node_vol_should)
+ break
+
self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks: