gnt-debug: rename allocator to iallocator
[ganeti-local] / lib / cmdlib.py
index 01845be..98a056c 100644 (file)
@@ -1332,6 +1332,8 @@ class _VerifyErrors(object):
   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
+  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
+  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
@@ -1404,7 +1406,6 @@ class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
   """Verifies the cluster config.
 
   """
-
   REQ_BGL = False
 
   def _VerifyHVP(self, hvp_data):
@@ -1423,6 +1424,7 @@ class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
 
   def ExpandNames(self):
     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
+    self.all_node_info = self.cfg.GetAllNodesInfo()
     self.all_inst_info = self.cfg.GetAllInstancesInfo()
     self.needed_locks = {}
 
@@ -1449,6 +1451,39 @@ class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
                                                 self.all_inst_info.values()))
 
+    feedback_fn("* Verifying all nodes belong to an existing group")
+
+    # We do this verification here because, should this bogus circumstance
+    # occur, it would never be catched by VerifyGroup, which only acts on
+    # nodes/instances reachable from existing node groups.
+
+    dangling_nodes = set(node.name for node in self.all_node_info.values()
+                         if node.group not in self.all_group_info)
+
+    dangling_instances = {}
+    no_node_instances = []
+
+    for inst in self.all_inst_info.values():
+      if inst.primary_node in dangling_nodes:
+        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
+      elif inst.primary_node not in self.all_node_info:
+        no_node_instances.append(inst.name)
+
+    pretty_dangling = [
+        "%s (%s)" %
+        (node.name,
+         utils.CommaJoin(dangling_instances.get(node.name,
+                                                ["no instances"])))
+        for node in dangling_nodes]
+
+    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
+                  "the following nodes (and their instances) belong to a non"
+                  " existing group: %s", utils.CommaJoin(pretty_dangling))
+
+    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
+                  "the following instances have a non-existing primary-node:"
+                  " %s", utils.CommaJoin(no_node_instances))
+
     return (not self.bad, [g.name for g in self.all_group_info.values()])
 
 
@@ -1456,7 +1491,6 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
   """Verifies the status of a node group.
 
   """
-
   HPATH = "cluster-verify"
   HTYPE = constants.HTYPE_CLUSTER
   REQ_BGL = False
@@ -1528,6 +1562,15 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
                   for inst in all_inst_info.values()
                   if inst.primary_node in node_names]
 
+    # In Exec(), we warn about mirrored instances that have primary and
+    # secondary living in separate node groups. To fully verify that
+    # volumes for these instances are healthy, we will need to do an
+    # extra call to their secondaries. We ensure here those nodes will
+    # be locked.
+    for inst in inst_names:
+      if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
+        node_names.update(all_inst_info[inst].secondary_nodes)
+
     self.needed_locks = {
       locking.LEVEL_NODEGROUP: [self.group_uuid],
       locking.LEVEL_NODE: list(node_names),
@@ -1571,6 +1614,25 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
     self.my_inst_info = dict((name, self.all_inst_info[name])
                              for name in self.my_inst_names)
 
+    # We detect here the nodes that will need the extra RPC calls for verifying
+    # split LV volumes; they should be locked.
+    extra_lv_nodes = set()
+
+    for inst in self.my_inst_info.values():
+      if inst.disk_template in constants.DTS_INT_MIRROR:
+        group = self.my_node_info[inst.primary_node].group
+        for nname in inst.secondary_nodes:
+          if self.all_node_info[nname].group != group:
+            extra_lv_nodes.add(nname)
+
+    unlocked_lv_nodes = \
+        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
+
+    if unlocked_lv_nodes:
+      raise errors.OpPrereqError("these nodes could be locked: %s" %
+                                 utils.CommaJoin(unlocked_lv_nodes))
+    self.extra_lv_nodes = list(extra_lv_nodes)
+
   def _VerifyNode(self, ninfo, nresult):
     """Perform some basic validation on data returned from a node.
 
@@ -2387,13 +2449,26 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
     master_ip = self.cfg.GetMasterIP()
 
     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
+
+    # We will make nodes contact all nodes in their group, and one node from
+    # every other group.
+    # TODO: should it be a *random* node, different every time?
+    online_nodes = [node.name for node in node_data_list if not node.offline]
+    other_group_nodes = {}
+
+    for name in sorted(self.all_node_info):
+      node = self.all_node_info[name]
+      if (node.group not in other_group_nodes
+          and node.group != self.group_uuid
+          and not node.offline):
+        other_group_nodes[node.group] = node.name
+
     node_verify_param = {
       constants.NV_FILELIST:
         utils.UniqueSequence(filename
                              for files in filemap
                              for filename in files),
-      constants.NV_NODELIST: [node.name for node in self.all_node_info.values()
-                              if not node.offline],
+      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
       constants.NV_HYPERVISOR: hypervisors,
       constants.NV_HVPARAMS:
         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
@@ -2455,9 +2530,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
       for nname in inst_config.all_nodes:
         if nname not in node_image:
-          # ghost node
           gnode = self.NodeImage(name=nname)
-          gnode.ghost = True
+          gnode.ghost = (nname not in self.all_node_info)
           node_image[nname] = gnode
 
       inst_config.MapLVsByNode(node_vol_should)
@@ -2483,6 +2557,13 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
                                            node_verify_param,
                                            self.cfg.GetClusterName())
+    if self.extra_lv_nodes and vg_name is not None:
+      extra_lv_nvinfo = \
+          self.rpc.call_node_verify(self.extra_lv_nodes,
+                                    {constants.NV_LVLIST: vg_name},
+                                    self.cfg.GetClusterName())
+    else:
+      extra_lv_nvinfo = {}
     nvinfo_endtime = time.time()
 
     all_drbd_map = self.cfg.ComputeDRBDMap()
@@ -2588,6 +2669,10 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
                    "node is running unknown instance %s", inst)
 
+    for node, result in extra_lv_nvinfo.items():
+      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
+                              node_image[node], vg_name)
+
     feedback_fn("* Verifying instance status")
     for instance in self.my_inst_names:
       if verbose:
@@ -2666,6 +2751,17 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
     feedback_fn("* Verifying orphan volumes")
     reserved = utils.FieldSet(*cluster.reserved_lvs)
+
+    # We will get spurious "unknown volume" warnings if any node of this group
+    # is secondary for an instance whose primary is in another group. To avoid
+    # them, we find these instances and add their volumes to node_vol_should.
+    for inst in self.all_inst_info.values():
+      for secondary in inst.secondary_nodes:
+        if (secondary in self.my_node_info
+            and inst.name not in self.my_inst_info):
+          inst.MapLVsByNode(node_vol_should)
+          break
+
     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
 
     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks: