Small improvements for cluster verify

[ganeti-local] / lib / cmdlib.py
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index 782cc2f..1e0ac3e 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -583,6 +583,30 @@ def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
    return inst_groups
  
  
+def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
+  """Checks if the instances in a node group are still correct.
+
+  @type cfg: L{config.ConfigWriter}
+  @param cfg: The cluster configuration
+  @type group_uuid: string
+  @param group_uuid: Node group UUID
+  @type owned_instances: set or frozenset
+  @param owned_instances: List of currently owned instances
+
+  """
+  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
+  if owned_instances != wanted_instances:
+    raise errors.OpPrereqError("Instances in node group '%s' changed since"
+                               " locks were acquired, wanted '%s', have '%s';"
+                               " retry the operation" %
+                               (group_uuid,
+                                utils.CommaJoin(wanted_instances),
+                                utils.CommaJoin(owned_instances)),
+                               errors.ECODE_STATE)
+
+  return wanted_instances
+
+
  def _SupportsOob(cfg, node):
    """Tells if node supports OOB.
  
@@ -1486,6 +1510,47 @@ class _VerifyErrors(object):
        self.bad = self.bad or cond
  
  
+class LUClusterVerify(NoHooksLU):
+  """Submits all jobs necessary to verify the cluster.
+
+  """
+  REQ_BGL = False
+
+  def ExpandNames(self):
+    self.needed_locks = {}
+
+  def Exec(self, feedback_fn):
+    jobs = []
+
+    if self.op.group_name:
+      groups = [self.op.group_name]
+      depends_fn = lambda: None
+    else:
+      groups = self.cfg.GetNodeGroupList()
+
+      # Verify global configuration
+      jobs.append([opcodes.OpClusterVerifyConfig()])
+
+      # Always depend on global verification
+      depends_fn = lambda: [(-len(jobs), [])]
+
+    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
+                                              depends=depends_fn())]
+                for group in groups)
+
+    # Fix up all parameters
+    for op in itertools.chain(*jobs): # pylint: disable-msg=W0142
+      op.debug_simulate_errors = self.op.debug_simulate_errors
+      op.verbose = self.op.verbose
+      op.error_codes = self.op.error_codes
+      try:
+        op.skip_checks = self.op.skip_checks
+      except AttributeError:
+        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
+
+    return ResultWithJobs(jobs)
+
+
  class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
    """Verifies the cluster config.
  
@@ -1509,6 +1574,7 @@ class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
    def ExpandNames(self):
      # Information can be safely retrieved as the BGL is acquired in exclusive
      # mode
+    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
      self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
      self.all_node_info = self.cfg.GetAllNodesInfo()
      self.all_inst_info = self.cfg.GetAllInstancesInfo()
@@ -1570,7 +1636,7 @@ class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
                    "the following instances have a non-existing primary-node:"
                    " %s", utils.CommaJoin(no_node_instances))
  
-    return (not self.bad, [g.name for g in self.all_group_info.values()])
+    return not self.bad
  
  
  class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
@@ -1668,7 +1734,10 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        self.needed_locks[locking.LEVEL_NODE] = nodes
  
    def CheckPrereq(self):
-    group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
+    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
+    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
+
+    group_nodes = set(self.group_info.members)
      group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
  
      unlocked_nodes = \
@@ -2500,6 +2569,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      """
      # This method has too many local variables. pylint: disable-msg=R0914
+    feedback_fn("* Verifying group '%s'" % self.group_info.name)
  
      if not self.my_node_names:
        # empty node group
@@ -2997,27 +3067,21 @@ class LUGroupVerifyDisks(NoHooksLU):
      assert self.group_uuid in owned_groups
  
      # Check if locked instances are still correct
-    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
-    if owned_instances != wanted_instances:
-      raise errors.OpPrereqError("Instances in node group %s changed since"
-                                 " locks were acquired, wanted %s, have %s;"
-                                 " retry the operation" %
-                                 (self.op.group_name,
-                                  utils.CommaJoin(wanted_instances),
-                                  utils.CommaJoin(owned_instances)),
-                                 errors.ECODE_STATE)
+    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
  
      # Get instance information
      self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
  
      # Check if node groups for locked instances are still correct
      for (instance_name, inst) in self.instances.items():
-      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
-        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
        assert owned_nodes.issuperset(inst.all_nodes), \
          "Instance %s's nodes changed while we kept the lock" % instance_name
  
-      _CheckInstanceNodeGroups(self.cfg, instance_name, owned_groups)
+      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
+                                             owned_groups)
+
+      assert self.group_uuid in inst_groups, \
+        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
  
    def Exec(self, feedback_fn):
      """Verify integrity of cluster disks.
@@ -4279,7 +4343,7 @@ class _NodeQuery(_QueryBase):
  
    def ExpandNames(self, lu):
      lu.needed_locks = {}
-    lu.share_locks[locking.LEVEL_NODE] = 1
+    lu.share_locks = _ShareAll()
  
      if self.names:
        self.wanted = _GetWantedNodes(lu, self.names)
@@ -4290,7 +4354,7 @@ class _NodeQuery(_QueryBase):
                         query.NQ_LIVE in self.requested_data)
  
      if self.do_locking:
-      # if we don't request only static fields, we need to lock the nodes
+      # If any non-static field is requested we need to lock the nodes
        lu.needed_locks[locking.LEVEL_NODE] = self.wanted
  
    def DeclareLocks(self, lu, level):
@@ -4662,7 +4726,7 @@ class LUQuery(NoHooksLU):
    def CheckArguments(self):
      qcls = _GetQueryImplementation(self.op.what)
  
-    self.impl = qcls(self.op.filter, self.op.fields, False)
+    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
  
    def ExpandNames(self):
      self.impl.ExpandNames(self)
@@ -8873,7 +8937,6 @@ class LUInstanceCreate(LogicalUnit):
        disk_abort = not _WaitForSync(self, iobj)
      elif iobj.disk_template in constants.DTS_INT_MIRROR:
        # make sure the disks are not degraded (still sync-ing is ok)
-      time.sleep(15)
        feedback_fn("* checking mirrors status")
        disk_abort = not _WaitForSync(self, iobj, oneshot=True)
      else:
@@ -9251,6 +9314,9 @@ class TLReplaceDisks(Tasklet):
      return remote_node_name
  
    def _FindFaultyDisks(self, node_name):
+    """Wrapper for L{_FindFaultyInstanceDisks}.
+
+    """
      return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
                                      node_name, True)
  
@@ -11996,6 +12062,9 @@ class LUGroupQuery(NoHooksLU):
    def ExpandNames(self):
      self.gq.ExpandNames(self)
  
+  def DeclareLocks(self, level):
+    self.gq.DeclareLocks(self, level)
+
    def Exec(self, feedback_fn):
      return self.gq.OldStyleQuery(self)
  
@@ -12293,15 +12362,7 @@ class LUGroupEvacuate(LogicalUnit):
      assert self.group_uuid in owned_groups
  
      # Check if locked instances are still correct
-    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
-    if owned_instances != wanted_instances:
-      raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
-                                 " changed since locks were acquired, wanted"
-                                 " %s, have %s; retry the operation" %
-                                 (self.group_uuid,
-                                  utils.CommaJoin(wanted_instances),
-                                  utils.CommaJoin(owned_instances)),
-                                 errors.ECODE_STATE)
+    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
  
      # Get instance information
      self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
@@ -12747,7 +12808,6 @@ class IAllocator(object):
      self.hypervisor = None
      self.relocate_from = None
      self.name = None
-    self.evac_nodes = None
      self.instances = None
      self.evac_mode = None
      self.target_groups = []
@@ -13029,15 +13089,6 @@ class IAllocator(object):
        }
      return request
  
-  def _AddEvacuateNodes(self):
-    """Add evacuate nodes data to allocator structure.
-
-    """
-    request = {
-      "evac_nodes": self.evac_nodes
-      }
-    return request
-
    def _AddNodeEvacuate(self):
      """Get data for node-evacuate requests.
  
@@ -13118,9 +13169,6 @@ class IAllocator(object):
        (_AddRelocateInstance,
         [("name", ht.TString), ("relocate_from", _STRING_LIST)],
         ht.TList),
-    constants.IALLOCATOR_MODE_MEVAC:
-      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
-       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
       constants.IALLOCATOR_MODE_NODE_EVAC:
        (_AddNodeEvacuate, [
          ("instances", _STRING_LIST),
@@ -13179,39 +13227,25 @@ class IAllocator(object):
                                 (self._result_check, self.result),
                                 errors.ECODE_INVAL)
  
-    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
-                     constants.IALLOCATOR_MODE_MEVAC):
+    if self.mode == constants.IALLOCATOR_MODE_RELOC:
+      assert self.relocate_from is not None
+      assert self.required_nodes == 1
+
        node2group = dict((name, ndata["group"])
                          for (name, ndata) in self.in_data["nodes"].items())
  
        fn = compat.partial(self._NodesToGroups, node2group,
                            self.in_data["nodegroups"])
  
-      if self.mode == constants.IALLOCATOR_MODE_RELOC:
-        assert self.relocate_from is not None
-        assert self.required_nodes == 1
-
-        request_groups = fn(self.relocate_from)
-        result_groups = fn(rdict["result"])
-
-        if result_groups != request_groups:
-          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
-                                   " differ from original groups (%s)" %
-                                   (utils.CommaJoin(result_groups),
-                                    utils.CommaJoin(request_groups)))
-      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
-        request_groups = fn(self.evac_nodes)
-        for (instance_name, secnode) in self.result:
-          result_groups = fn([secnode])
-          if result_groups != request_groups:
-            raise errors.OpExecError("Iallocator returned new secondary node"
-                                     " '%s' (group '%s') for instance '%s'"
-                                     " which is not in original group '%s'" %
-                                     (secnode, utils.CommaJoin(result_groups),
-                                      instance_name,
-                                      utils.CommaJoin(request_groups)))
-      else:
-        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
+      instance = self.cfg.GetInstanceInfo(self.name)
+      request_groups = fn(self.relocate_from + [instance.primary_node])
+      result_groups = fn(rdict["result"] + [instance.primary_node])
+
+      if self.success and not set(result_groups).issubset(request_groups):
+        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
+                                 " differ from original groups (%s)" %
+                                 (utils.CommaJoin(result_groups),
+                                  utils.CommaJoin(request_groups)))
  
      elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
        assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
@@ -13295,10 +13329,6 @@ class LUTestAllocator(NoHooksLU):
        self.op.name = fname
        self.relocate_from = \
            list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
-    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
-      if not hasattr(self.op, "evac_nodes"):
-        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
-                                   " opcode input", errors.ECODE_INVAL)
      elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
                            constants.IALLOCATOR_MODE_NODE_EVAC):
        if not self.op.instances:
@@ -13339,10 +13369,6 @@ class LUTestAllocator(NoHooksLU):
                         name=self.op.name,
                         relocate_from=list(self.relocate_from),
                         )
-    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
-      ial = IAllocator(self.cfg, self.rpc,
-                       mode=self.op.mode,
-                       evac_nodes=self.op.evac_nodes)
      elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
        ial = IAllocator(self.cfg, self.rpc,
                         mode=self.op.mode,