Add a new NodeGroup config object
[ganeti-local] / lib / config.py
index ed83610..cda9441 100644 (file)
@@ -1,7 +1,7 @@
 #
 #
 
-# Copyright (C) 2006, 2007 Google Inc.
+# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -31,6 +31,9 @@ much memory.
 
 """
 
+# pylint: disable-msg=R0904
+# R0904: Too many public methods
+
 import os
 import random
 import logging
@@ -43,12 +46,15 @@ from ganeti import constants
 from ganeti import rpc
 from ganeti import objects
 from ganeti import serializer
+from ganeti import uidpool
+from ganeti import netutils
+from ganeti import runtime
 
 
-_config_lock = locking.SharedLock()
+_config_lock = locking.SharedLock("ConfigWriter")
 
 # job id used for resource management at config upgrade time
-_UPGRADE_CONFIG_JID="jid-cfg-upgrade"
+_UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
 
 
 def _ValidateConfig(data):
@@ -125,8 +131,11 @@ class TemporaryReservationManager:
 class ConfigWriter:
   """The interface to the cluster configuration.
 
+  @ivar _temporary_lvs: reservation manager for temporary LVs
+  @ivar _all_rms: a list of all temporary reservation managers
+
   """
-  def __init__(self, cfg_file=None, offline=False):
+  def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts):
     self.write_count = 0
     self._lock = _config_lock
     self._config_data = None
@@ -135,14 +144,19 @@ class ConfigWriter:
       self._cfg_file = constants.CLUSTER_CONF_FILE
     else:
       self._cfg_file = cfg_file
+    self._getents = _getents
     self._temporary_ids = TemporaryReservationManager()
     self._temporary_drbds = {}
     self._temporary_macs = TemporaryReservationManager()
+    self._temporary_secrets = TemporaryReservationManager()
+    self._temporary_lvs = TemporaryReservationManager()
+    self._all_rms = [self._temporary_ids, self._temporary_macs,
+                     self._temporary_secrets, self._temporary_lvs]
     # Note: in order to prevent errors when resolving our name in
     # _DistributeConfig, we compute it here once and reuse it; it's
     # better to raise an error before starting to modify the config
     # file than after it was modified
-    self._my_hostname = utils.HostInfo().name
+    self._my_hostname = netutils.Hostname.GetSysName()
     self._last_cluster_serial = -1
     self._OpenConfig()
 
@@ -190,22 +204,29 @@ class ConfigWriter:
       self._temporary_macs.Reserve(mac, ec_id)
 
   @locking.ssynchronized(_config_lock, shared=1)
-  def GenerateDRBDSecret(self):
+  def ReserveLV(self, lv_name, ec_id):
+    """Reserve an VG/LV pair for an instance.
+
+    @type lv_name: string
+    @param lv_name: the logical volume name to reserve
+
+    """
+    all_lvs = self._AllLVs()
+    if lv_name in all_lvs:
+      raise errors.ReservationError("LV already in use")
+    else:
+      self._temporary_lvs.Reserve(lv_name, ec_id)
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GenerateDRBDSecret(self, ec_id):
     """Generate a DRBD secret.
 
     This checks the current disks for duplicates.
 
     """
-    all_secrets = self._AllDRBDSecrets()
-    retries = 64
-    while retries > 0:
-      secret = utils.GenerateSecret()
-      if secret not in all_secrets:
-        break
-      retries -= 1
-    else:
-      raise errors.ConfigurationError("Can't generate unique DRBD secret")
-    return secret
+    return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
+                                            utils.GenerateSecret,
+                                            ec_id)
 
   def _AllLVs(self):
     """Compute the list of all LVs.
@@ -349,6 +370,11 @@ class ConfigWriter:
     if invalid_hvs:
       result.append("enabled hypervisors contains invalid entries: %s" %
                     invalid_hvs)
+    missing_hvp = (set(data.cluster.enabled_hypervisors) -
+                   set(data.cluster.hvparams.keys()))
+    if missing_hvp:
+      result.append("hypervisor parameters missing for the enabled"
+                    " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
 
     if data.cluster.master_node not in data.nodes:
       result.append("cluster has invalid primary node '%s'" %
@@ -357,6 +383,9 @@ class ConfigWriter:
     # per-instance checks
     for instance_name in data.instances:
       instance = data.instances[instance_name]
+      if instance.name != instance_name:
+        result.append("instance '%s' is indexed by wrong name '%s'" %
+                      (instance.name, instance_name))
       if instance.primary_node not in data.nodes:
         result.append("instance '%s' has invalid primary node '%s'" %
                       (instance_name, instance.primary_node))
@@ -403,7 +432,7 @@ class ConfigWriter:
     for pnum in keys:
       pdata = ports[pnum]
       if len(pdata) > 1:
-        txt = ", ".join(["%s/%s" % val for val in pdata])
+        txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
         result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
 
     # highest used tcp port check
@@ -422,7 +451,10 @@ class ConfigWriter:
                     (mc_now, mc_max))
 
     # node checks
-    for node in data.nodes.values():
+    for node_name, node in data.nodes.items():
+      if node.name != node_name:
+        result.append("Node '%s' is indexed by wrong name '%s'" %
+                      (node.name, node_name))
       if [node.master_candidate, node.drained, node.offline].count(True) > 1:
         result.append("Node %s state is invalid: master_candidate=%s,"
                       " drain=%s, offline=%s" %
@@ -430,28 +462,49 @@ class ConfigWriter:
                        node.offline))
 
     # drbd minors check
-    d_map, duplicates = self._UnlockedComputeDRBDMap()
+    _, duplicates = self._UnlockedComputeDRBDMap()
     for node, minor, instance_a, instance_b in duplicates:
       result.append("DRBD minor %d on node %s is assigned twice to instances"
                     " %s and %s" % (minor, node, instance_a, instance_b))
 
     # IP checks
-    ips = { data.cluster.master_ip: ["cluster_ip"] }
-    def _helper(ip, name):
-      if ip in ips:
-        ips[ip].append(name)
-      else:
-        ips[ip] = [name]
+    default_nicparams = data.cluster.nicparams[constants.PP_DEFAULT]
+    ips = {}
+
+    def _AddIpAddress(ip, name):
+      ips.setdefault(ip, []).append(name)
+
+    _AddIpAddress(data.cluster.master_ip, "cluster_ip")
 
     for node in data.nodes.values():
-      _helper(node.primary_ip, "node:%s/primary" % node.name)
+      _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
       if node.secondary_ip != node.primary_ip:
-        _helper(node.secondary_ip, "node:%s/secondary" % node.name)
+        _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
+
+    for instance in data.instances.values():
+      for idx, nic in enumerate(instance.nics):
+        if nic.ip is None:
+          continue
+
+        nicparams = objects.FillDict(default_nicparams, nic.nicparams)
+        nic_mode = nicparams[constants.NIC_MODE]
+        nic_link = nicparams[constants.NIC_LINK]
+
+        if nic_mode == constants.NIC_MODE_BRIDGED:
+          link = "bridge:%s" % nic_link
+        elif nic_mode == constants.NIC_MODE_ROUTED:
+          link = "route:%s" % nic_link
+        else:
+          raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
+
+        _AddIpAddress("%s/%s" % (link, nic.ip),
+                      "instance:%s/nic:%d" % (instance.name, idx))
 
     for ip, owners in ips.items():
       if len(owners) > 1:
         result.append("IP address %s is used by multiple owners: %s" %
-                      (ip, ", ".join(owners)))
+                      (ip, utils.CommaJoin(owners)))
+
     return result
 
   @locking.ssynchronized(_config_lock, shared=1)
@@ -769,6 +822,22 @@ class ConfigWriter:
     """
     return self._config_data.cluster.rsahostkeypub
 
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetDefaultIAllocator(self):
+    """Get the default instance allocator for this cluster.
+
+    """
+    return self._config_data.cluster.default_iallocator
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetPrimaryIPFamily(self):
+    """Get cluster primary ip family.
+
+    @return: primary ip family
+
+    """
+    return self._config_data.cluster.primary_ip_family
+
   @locking.ssynchronized(_config_lock)
   def AddInstance(self, instance, ec_id):
     """Add an instance to the config.
@@ -811,9 +880,9 @@ class ConfigWriter:
     """
     if not item.uuid:
       item.uuid = self._GenerateUniqueID(ec_id)
-    elif item.uuid in self._AllIDs(temporary=True):
-      raise errors.ConfigurationError("Cannot add '%s': UUID already in use" %
-                                      (item.name, item.uuid))
+    elif item.uuid in self._AllIDs(include_temporary=True):
+      raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
+                                      " in use" % (item.name, item.uuid))
 
   def _SetInstanceStatus(self, instance_name, status):
     """Set the instance's status to a given value.
@@ -870,9 +939,9 @@ class ConfigWriter:
         # rename the file paths in logical and physical id
         file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
         disk.physical_id = disk.logical_id = (disk.logical_id[0],
-                                              os.path.join(file_storage_dir,
-                                                           inst.name,
-                                                           disk.iv_name))
+                                              utils.PathJoin(file_storage_dir,
+                                                             inst.name,
+                                                             disk.iv_name))
 
     self._config_data.instances[inst.name] = inst
     self._WriteConfig()
@@ -1009,7 +1078,6 @@ class ConfigWriter:
 
     return self._config_data.nodes[node_name]
 
-
   @locking.ssynchronized(_config_lock, shared=1)
   def GetNodeInfo(self, node_name):
     """Get the configuration of a node, as stored in the config.
@@ -1035,7 +1103,6 @@ class ConfigWriter:
     """
     return self._config_data.nodes.keys()
 
-
   @locking.ssynchronized(_config_lock, shared=1)
   def GetNodeList(self):
     """Return the list of nodes which are in the configuration.
@@ -1043,8 +1110,7 @@ class ConfigWriter:
     """
     return self._UnlockedGetNodeList()
 
-  @locking.ssynchronized(_config_lock, shared=1)
-  def GetOnlineNodeList(self):
+  def _UnlockedGetOnlineNodeList(self):
     """Return the list of nodes which are online.
 
     """
@@ -1053,6 +1119,13 @@ class ConfigWriter:
     return [node.name for node in all_nodes if not node.offline]
 
   @locking.ssynchronized(_config_lock, shared=1)
+  def GetOnlineNodeList(self):
+    """Return the list of nodes which are online.
+
+    """
+    return self._UnlockedGetOnlineNodeList()
+
+  @locking.ssynchronized(_config_lock, shared=1)
   def GetAllNodesInfo(self):
     """Get the configuration of all nodes.
 
@@ -1186,9 +1259,11 @@ class ConfigWriter:
     This is because some data elements need uniqueness across the
     whole configuration, etc.
 
-    @warning: this function will call L{_WriteConfig()}, so it needs
-        to either be called with the lock held or from a safe place
-        (the constructor)
+    @warning: this function will call L{_WriteConfig()}, but also
+        L{DropECReservations} so it needs to be called only from a
+        "safe" place (the constructor). If one wanted to call it with
+        the lock held, a DropECReservationUnlocked would need to be
+        created first, to avoid causing deadlock.
 
     """
     modified = False
@@ -1202,7 +1277,6 @@ class ConfigWriter:
       # only called at config init time, without the lock held
       self.DropECReservations(_UPGRADE_CONFIG_JID)
 
-
   def _DistributeConfig(self, feedback_fn):
     """Distribute the configuration to the other nodes.
 
@@ -1260,7 +1334,7 @@ class ConfigWriter:
     config_errors = self._UnlockedVerifyConfig()
     if config_errors:
       errmsg = ("Configuration data is not consistent: %s" %
-                (", ".join(config_errors)))
+                (utils.CommaJoin(config_errors)))
       logging.critical(errmsg)
       if feedback_fn:
         feedback_fn(errmsg)
@@ -1270,7 +1344,8 @@ class ConfigWriter:
     self._BumpSerialNo()
     txt = serializer.Dump(self._config_data.ToDict())
 
-    utils.WriteFile(destination, data=txt)
+    getents = self._getents()
+    utils.WriteFile(destination, data=txt, gid=getents.confd_gid, mode=0640)
 
     self.write_count += 1
 
@@ -1281,7 +1356,7 @@ class ConfigWriter:
     if self._last_cluster_serial < self._config_data.cluster.serial_no:
       if not self._offline:
         result = rpc.RpcRunner.call_write_ssconf_files(
-          self._UnlockedGetNodeList(),
+          self._UnlockedGetOnlineNodeList(),
           self._UnlockedGetSsconfValues())
 
         for nname, nresu in result.items():
@@ -1325,6 +1400,11 @@ class ConfigWriter:
 
     cluster = self._config_data.cluster
     cluster_tags = fn(cluster.GetTags())
+
+    hypervisor_list = fn(cluster.enabled_hypervisors)
+
+    uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
+
     return {
       constants.SS_CLUSTER_NAME: cluster.cluster_name,
       constants.SS_CLUSTER_TAGS: cluster_tags,
@@ -1339,11 +1419,22 @@ class ConfigWriter:
       constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
       constants.SS_OFFLINE_NODES: off_data,
       constants.SS_ONLINE_NODES: on_data,
+      constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
       constants.SS_INSTANCE_LIST: instance_data,
       constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
+      constants.SS_HYPERVISOR_LIST: hypervisor_list,
+      constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
+      constants.SS_UID_POOL: uid_pool,
       }
 
   @locking.ssynchronized(_config_lock, shared=1)
+  def GetSsconfValues(self):
+    """Wrapper using lock around _UnlockedGetSsconf().
+
+    """
+    return self._UnlockedGetSsconfValues()
+
+  @locking.ssynchronized(_config_lock, shared=1)
   def GetVGName(self):
     """Return the volume group name.
 
@@ -1360,6 +1451,22 @@ class ConfigWriter:
     self._WriteConfig()
 
   @locking.ssynchronized(_config_lock, shared=1)
+  def GetDRBDHelper(self):
+    """Return DRBD usermode helper.
+
+    """
+    return self._config_data.cluster.drbd_usermode_helper
+
+  @locking.ssynchronized(_config_lock)
+  def SetDRBDHelper(self, drbd_helper):
+    """Set DRBD usermode helper.
+
+    """
+    self._config_data.cluster.drbd_usermode_helper = drbd_helper
+    self._config_data.cluster.serial_no += 1
+    self._WriteConfig()
+
+  @locking.ssynchronized(_config_lock, shared=1)
   def GetMACPrefix(self):
     """Return the mac prefix.
 
@@ -1376,6 +1483,13 @@ class ConfigWriter:
     """
     return self._config_data.cluster
 
+  @locking.ssynchronized(_config_lock, shared=1)
+  def HasAnyDiskOfType(self, dev_type):
+    """Check if in there is at disk of the given type in the configuration.
+
+    """
+    return self._config_data.HasAnyDiskOfType(dev_type)
+
   @locking.ssynchronized(_config_lock)
   def Update(self, target, feedback_fn):
     """Notify function to be called after updates.
@@ -1427,6 +1541,5 @@ class ConfigWriter:
     """Drop per-execution-context reservations
 
     """
-    self._temporary_ids.DropECReservations(ec_id)
-    self._temporary_macs.DropECReservations(ec_id)
-
+    for rm in self._all_rms:
+      rm.DropECReservations(ec_id)