Add function to compute the master candidates

author Iustin Pop <iustin@google.com>

Fri, 5 Dec 2008 11:14:19 +0000 (11:14 +0000)

committer Iustin Pop <iustin@google.com>

Fri, 5 Dec 2008 11:14:19 +0000 (11:14 +0000)
author Iustin Pop <iustin@google.com>
Fri, 5 Dec 2008 11:14:19 +0000 (11:14 +0000)
committer Iustin Pop <iustin@google.com>
Fri, 5 Dec 2008 11:14:19 +0000 (11:14 +0000)
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index f0521aa..e543ba9 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -500,6 +500,22 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None):
    return _BuildInstanceHookEnv(**args)
  
  
+def _AdjustCandidatePool(lu):
+  """Adjust the candidate pool after node operations.
+
+  """
+  mod_list = lu.cfg.MaintainCandidatePool()
+  if mod_list:
+    lu.LogInfo("Promoted nodes to master candidate role: %s",
+               ", ".join(mod_list))
+    for name in mod_list:
+      lu.context.ReaddNode(name)
+  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
+  if mc_now > mc_max:
+    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
+               (mc_now, mc_max))
+
+
  def _CheckInstanceBridgesExist(lu, instance):
    """Check that the brigdes needed by an instance exist.
  
@@ -1358,26 +1374,7 @@ class LUSetClusterParams(LogicalUnit):
      # we want to update nodes after the cluster so that if any errors
      # happen, we have recorded and saved the cluster info
      if self.op.candidate_pool_size is not None:
-      node_info = self.cfg.GetAllNodesInfo().values()
-      num_candidates = len([node for node in node_info
-                            if node.master_candidate])
-      num_nodes = len(node_info)
-      if num_candidates < self.op.candidate_pool_size:
-        random.shuffle(node_info)
-        for node in node_info:
-          if num_candidates >= self.op.candidate_pool_size:
-            break
-          if node.master_candidate:
-            continue
-          node.master_candidate = True
-          self.LogInfo("Promoting node %s to master candidate", node.name)
-          self.cfg.Update(node)
-          self.context.ReaddNode(node)
-          num_candidates += 1
-      elif num_candidates > self.op.candidate_pool_size:
-        self.LogInfo("Note: more nodes are candidates (%d) than the new value"
-                     " of candidate_pool_size (%d)" %
-                     (num_candidates, self.op.candidate_pool_size))
+      _AdjustCandidatePool(self)
  
  
  def _WaitForSync(lu, instance, oneshot=False, unlock=False):
@@ -1623,22 +1620,7 @@ class LURemoveNode(LogicalUnit):
      self.rpc.call_node_leave_cluster(node.name)
  
      # Promote nodes to master candidate as needed
-    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
-    node_info = self.cfg.GetAllNodesInfo().values()
-    num_candidates = len([n for n in node_info
-                          if n.master_candidate])
-    num_nodes = len(node_info)
-    random.shuffle(node_info)
-    for node in node_info:
-      if num_candidates >= cp_size or num_candidates >= num_nodes:
-        break
-      if node.master_candidate:
-        continue
-      node.master_candidate = True
-      self.LogInfo("Promoting node %s to master candidate", node.name)
-      self.cfg.Update(node)
-      self.context.ReaddNode(node)
-      num_candidates += 1
+    _AdjustCandidatePool(self)
  
  
  class LUQueryNodes(NoHooksLU):
@@ -1973,9 +1955,8 @@ class LUAddNode(LogicalUnit):
  
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
      node_info = self.cfg.GetAllNodesInfo().values()
-    num_candidates = len([n for n in node_info
-                          if n.master_candidate])
-    master_candidate = num_candidates < cp_size
+    mc_now, _ = self.cfg.GetMasterCandidateStats()
+    master_candidate = mc_now < cp_size
  
      self.new_node = objects.Node(name=node,
                                   primary_ip=primary_ip,
diff --git a/lib/config.py b/lib/config.py

index 0d02f94..0263824 100644 (file)
--- a/lib/config.py
+++ b/lib/config.py
@@ -275,14 +275,10 @@ class ConfigWriter:
      if not data.nodes[data.cluster.master_node].master_candidate:
        result.append("Master node is not a master candidate")
  
-    cp_size = data.cluster.candidate_pool_size
-    num_c = 0
-    for node in data.nodes.values():
-      if node.master_candidate:
-        num_c += 1
-    if cp_size > num_c and num_c < len(data.nodes):
-      result.append("Not enough master candidates: actual %d, desired %d,"
-                    " %d total nodes" % (num_c, cp_size, len(data.nodes)))
+    mc_now, mc_max = self._UnlockedGetMasterCandidateStats()
+    if mc_now < mc_max:
+      result.append("Not enough master candidates: actual %d, target %d" %
+                    (mc_now, mc_max))
  
      return result
  
@@ -772,7 +768,7 @@ class ConfigWriter:
      """Get the configuration of all nodes.
  
      @rtype: dict
-    @returns: dict of (node, node_info), where node_info is what
+    @return: dict of (node, node_info), where node_info is what
                would GetNodeInfo return for the node
  
      """
@@ -780,6 +776,67 @@ class ConfigWriter:
                      for node in self._UnlockedGetNodeList()])
      return my_dict
  
+  def _UnlockedGetMasterCandidateStats(self):
+    """Get the number of current and maximum desired and possible candidates.
+
+    @rtype: tuple
+    @return: tuple of (current, desired and possible)
+
+    """
+    mc_now = mc_max = 0
+    for node in self._config_data.nodes.itervalues():
+      if not node.offline:
+        mc_max += 1
+      if node.master_candidate:
+        mc_now += 1
+    mc_max = min(mc_max, self._config_data.cluster.candidate_pool_size)
+    return (mc_now, mc_max)
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetMasterCandidateStats(self):
+    """Get the number of current and maximum possible candidates.
+
+    This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
+
+    @rtype: tuple
+    @return: tuple of (current, max)
+
+    """
+    return self._UnlockedGetMasterCandidateStats()
+
+  @locking.ssynchronized(_config_lock)
+  def MaintainCandidatePool(self):
+    """Try to grow the candidate pool to the desired size.
+
+    @rtype: list
+    @return: list with the adjusted node names
+
+    """
+    mc_now, mc_max = self._UnlockedGetMasterCandidateStats()
+    mod_list = []
+    if mc_now < mc_max:
+      node_list = self._config_data.nodes.keys()
+      random.shuffle(node_list)
+      for name in node_list:
+        if mc_now >= mc_max:
+          break
+        node = self._config_data.nodes[name]
+        if node.master_candidate or node.offline:
+          continue
+        mod_list.append(node.name)
+        node.master_candidate = True
+        node.serial_no += 1
+        mc_now += 1
+      if mc_now != mc_max:
+        # this should not happen
+        logging.warning("Warning: MaintainCandidatePool didn't manage to"
+                        " fill the candidate pool (%d/%d)", mc_now, mc_max)
+      if mod_list:
+        self._config_data.cluster.serial_no += 1
+        self._WriteConfig()
+
+    return mod_list
+
    def _BumpSerialNo(self):
      """Bump up the serial number of the config.
author	Iustin Pop <iustin@google.com>
	Fri, 5 Dec 2008 11:14:19 +0000 (11:14 +0000)
committer	Iustin Pop <iustin@google.com>
	Fri, 5 Dec 2008 11:14:19 +0000 (11:14 +0000)
lib/cmdlib.py		patch \| blob \| history
lib/config.py		patch \| blob \| history