jqueue: Use priority for acquiring locks
[ganeti-local] / lib / config.py
index 371a399..94dcf3c 100644 (file)
@@ -1,7 +1,7 @@
 #
 #
 
-# Copyright (C) 2006, 2007 Google Inc.
+# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -31,6 +31,9 @@ much memory.
 
 """
 
+# pylint: disable-msg=R0904
+# R0904: Too many public methods
+
 import os
 import random
 import logging
@@ -44,9 +47,11 @@ from ganeti import rpc
 from ganeti import objects
 from ganeti import serializer
 from ganeti import uidpool
+from ganeti import netutils
+from ganeti import runtime
 
 
-_config_lock = locking.SharedLock()
+_config_lock = locking.SharedLock("ConfigWriter")
 
 # job id used for resource management at config upgrade time
 _UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
@@ -130,7 +135,7 @@ class ConfigWriter:
   @ivar _all_rms: a list of all temporary reservation managers
 
   """
-  def __init__(self, cfg_file=None, offline=False):
+  def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts):
     self.write_count = 0
     self._lock = _config_lock
     self._config_data = None
@@ -139,6 +144,7 @@ class ConfigWriter:
       self._cfg_file = constants.CLUSTER_CONF_FILE
     else:
       self._cfg_file = cfg_file
+    self._getents = _getents
     self._temporary_ids = TemporaryReservationManager()
     self._temporary_drbds = {}
     self._temporary_macs = TemporaryReservationManager()
@@ -150,7 +156,7 @@ class ConfigWriter:
     # _DistributeConfig, we compute it here once and reuse it; it's
     # better to raise an error before starting to modify the config
     # file than after it was modified
-    self._my_hostname = utils.HostInfo().name
+    self._my_hostname = netutils.Hostname.GetSysName()
     self._last_cluster_serial = -1
     self._OpenConfig()
 
@@ -455,6 +461,18 @@ class ConfigWriter:
                       (node.name, node.master_candidate, node.drain,
                        node.offline))
 
+    # nodegroups checks
+    nodegroups_names = set()
+    for nodegroup_uuid in data.nodegroups:
+      nodegroup = data.nodegroups[nodegroup_uuid]
+      if nodegroup.uuid != nodegroup_uuid:
+        result.append("nodegroup '%s' (uuid: '%s') indexed by wrong uuid '%s'"
+                      % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
+      if nodegroup.name in nodegroups_names:
+        result.append("duplicate nodegroup name '%s'" % nodegroup.name)
+      else:
+        nodegroups_names.add(nodegroup.name)
+
     # drbd minors check
     _, duplicates = self._UnlockedComputeDRBDMap()
     for node, minor, instance_a, instance_b in duplicates:
@@ -816,6 +834,46 @@ class ConfigWriter:
     """
     return self._config_data.cluster.rsahostkeypub
 
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetDefaultIAllocator(self):
+    """Get the default instance allocator for this cluster.
+
+    """
+    return self._config_data.cluster.default_iallocator
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetPrimaryIPFamily(self):
+    """Get cluster primary ip family.
+
+    @return: primary ip family
+
+    """
+    return self._config_data.cluster.primary_ip_family
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def LookupNodeGroup(self, target):
+    """Lookup a node group.
+
+    @type target: string or None
+    @param  target: group name or uuid or None to look for the default
+    @rtype: string
+    @return: nodegroup uuid
+    @raises errors.OpPrereqError: when the target group cannot be found
+
+    """
+    if target is None:
+      if len(self._config_data.nodegroups) != 1:
+        raise errors.OpPrereqError("More than one nodegroup exists. Target"
+                                   " group must be specified explicitely.")
+      else:
+        return self._config_data.nodegroups.keys()[0]
+    if target in self._config_data.nodegroups:
+      return target
+    for nodegroup in self._config_data.nodegroups.values():
+      if nodegroup.name == target:
+        return nodegroup.uuid
+    raise errors.OpPrereqError("Nodegroup '%s' not found", target)
+
   @locking.ssynchronized(_config_lock)
   def AddInstance(self, instance, ec_id):
     """Add an instance to the config.
@@ -1012,6 +1070,7 @@ class ConfigWriter:
 
     node.serial_no = 1
     node.ctime = node.mtime = time.time()
+    self._UnlockedAddNodeToGroup(node.name, node.nodegroup)
     self._config_data.nodes[node.name] = node
     self._config_data.cluster.serial_no += 1
     self._WriteConfig()
@@ -1026,6 +1085,7 @@ class ConfigWriter:
     if node_name not in self._config_data.nodes:
       raise errors.ConfigurationError("Unknown node '%s'" % node_name)
 
+    self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_name])
     del self._config_data.nodes[node_name]
     self._config_data.cluster.serial_no += 1
     self._WriteConfig()
@@ -1186,6 +1246,34 @@ class ConfigWriter:
 
     return mod_list
 
+  def _UnlockedAddNodeToGroup(self, node_name, nodegroup_uuid):
+    """Add a given node to the specified group.
+
+    """
+    if nodegroup_uuid not in self._config_data.nodegroups:
+      # This can happen if a node group gets deleted between its lookup and
+      # when we're adding the first node to it, since we don't keep a lock in
+      # the meantime. It's ok though, as we'll fail cleanly if the node group
+      # is not found anymore.
+      raise errors.OpExecError("Unknown nodegroup: %s" % nodegroup_uuid)
+    if node_name not in self._config_data.nodegroups[nodegroup_uuid].members:
+      self._config_data.nodegroups[nodegroup_uuid].members.append(node_name)
+
+  def _UnlockedRemoveNodeFromGroup(self, node):
+    """Remove a given node from its group.
+
+    """
+    nodegroup = node.nodegroup
+    if nodegroup not in self._config_data.nodegroups:
+      logging.warning("Warning: node '%s' has a non-existing nodegroup '%s'"
+                      " (while being removed from it)", node.name, nodegroup)
+    nodegroup_obj = self._config_data.nodegroups[nodegroup]
+    if node.name not in nodegroup_obj.members:
+      logging.warning("Warning: node '%s' not a member of its nodegroup '%s'"
+                      " (while being removed from it)", node.name, nodegroup)
+    else:
+      nodegroup_obj.members.remove(node.name)
+
   def _BumpSerialNo(self):
     """Bump up the serial number of the config.
 
@@ -1199,6 +1287,7 @@ class ConfigWriter:
     """
     return (self._config_data.instances.values() +
             self._config_data.nodes.values() +
+            self._config_data.nodegroups.values() +
             [self._config_data.cluster])
 
   def _OpenConfig(self):
@@ -1249,6 +1338,24 @@ class ConfigWriter:
       if item.uuid is None:
         item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
         modified = True
+    if not self._config_data.nodegroups:
+      default_nodegroup_uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
+      default_nodegroup = objects.NodeGroup(
+          uuid=default_nodegroup_uuid,
+          name="default",
+          members=[],
+          )
+      self._config_data.nodegroups[default_nodegroup_uuid] = default_nodegroup
+      modified = True
+    for node in self._config_data.nodes.values():
+      if not node.nodegroup:
+        node.nodegroup = self.LookupNodeGroup(None)
+        modified = True
+      # This is technically *not* an upgrade, but needs to be done both when
+      # nodegroups are being added, and upon normally loading the config,
+      # because the members list of a node group is discarded upon
+      # serializing/deserializing the object.
+      self._UnlockedAddNodeToGroup(node.name, node.nodegroup)
     if modified:
       self._WriteConfig()
       # This is ok even if it acquires the internal lock, as _UpgradeConfig is
@@ -1322,7 +1429,8 @@ class ConfigWriter:
     self._BumpSerialNo()
     txt = serializer.Dump(self._config_data.ToDict())
 
-    utils.WriteFile(destination, data=txt)
+    getents = self._getents()
+    utils.WriteFile(destination, data=txt, gid=getents.confd_gid, mode=0640)
 
     self.write_count += 1
 
@@ -1382,6 +1490,10 @@ class ConfigWriter:
 
     uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
 
+    nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in
+                  self._config_data.nodegroups.values()]
+    nodegroups_data = fn(utils.NiceSort(nodegroups))
+
     return {
       constants.SS_CLUSTER_NAME: cluster.cluster_name,
       constants.SS_CLUSTER_TAGS: cluster_tags,
@@ -1396,14 +1508,23 @@ class ConfigWriter:
       constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
       constants.SS_OFFLINE_NODES: off_data,
       constants.SS_ONLINE_NODES: on_data,
+      constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
       constants.SS_INSTANCE_LIST: instance_data,
       constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
       constants.SS_HYPERVISOR_LIST: hypervisor_list,
       constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
       constants.SS_UID_POOL: uid_pool,
+      constants.SS_NODEGROUPS: nodegroups_data,
       }
 
   @locking.ssynchronized(_config_lock, shared=1)
+  def GetSsconfValues(self):
+    """Wrapper using lock around _UnlockedGetSsconf().
+
+    """
+    return self._UnlockedGetSsconfValues()
+
+  @locking.ssynchronized(_config_lock, shared=1)
   def GetVGName(self):
     """Return the volume group name.