Add uid_pool cluster parameter
[ganeti-local] / lib / config.py
index 3f5119f..353c288 100644 (file)
@@ -47,6 +47,9 @@ from ganeti import serializer
 
 _config_lock = locking.SharedLock()
 
+# job id used for resource management at config upgrade time
+_UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
+
 
 def _ValidateConfig(data):
   """Verifies that a configuration objects looks valid.
@@ -122,6 +125,9 @@ class TemporaryReservationManager:
 class ConfigWriter:
   """The interface to the cluster configuration.
 
+  @ivar _temporary_lvs: reservation manager for temporary LVs
+  @ivar _all_rms: a list of all temporary reservation managers
+
   """
   def __init__(self, cfg_file=None, offline=False):
     self.write_count = 0
@@ -132,9 +138,13 @@ class ConfigWriter:
       self._cfg_file = constants.CLUSTER_CONF_FILE
     else:
       self._cfg_file = cfg_file
-    self._temporary_ids = set()
+    self._temporary_ids = TemporaryReservationManager()
     self._temporary_drbds = {}
-    self._temporary_macs = set()
+    self._temporary_macs = TemporaryReservationManager()
+    self._temporary_secrets = TemporaryReservationManager()
+    self._temporary_lvs = TemporaryReservationManager()
+    self._all_rms = [self._temporary_ids, self._temporary_macs,
+                     self._temporary_secrets, self._temporary_lvs]
     # Note: in order to prevent errors when resolving our name in
     # _DistributeConfig, we compute it here once and reuse it; it's
     # better to raise an error before starting to modify the config
@@ -151,57 +161,65 @@ class ConfigWriter:
     """
     return os.path.exists(constants.CLUSTER_CONF_FILE)
 
+  def _GenerateOneMAC(self):
+    """Generate one mac address
+
+    """
+    prefix = self._config_data.cluster.mac_prefix
+    byte1 = random.randrange(0, 256)
+    byte2 = random.randrange(0, 256)
+    byte3 = random.randrange(0, 256)
+    mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
+    return mac
+
   @locking.ssynchronized(_config_lock, shared=1)
-  def GenerateMAC(self):
+  def GenerateMAC(self, ec_id):
     """Generate a MAC for an instance.
 
     This should check the current instances for duplicates.
 
     """
-    prefix = self._config_data.cluster.mac_prefix
-    all_macs = self._AllMACs()
-    retries = 64
-    while retries > 0:
-      byte1 = random.randrange(0, 256)
-      byte2 = random.randrange(0, 256)
-      byte3 = random.randrange(0, 256)
-      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
-      if mac not in all_macs and mac not in self._temporary_macs:
-        break
-      retries -= 1
-    else:
-      raise errors.ConfigurationError("Can't generate unique MAC")
-    self._temporary_macs.add(mac)
-    return mac
+    existing = self._AllMACs()
+    return self._temporary_ids.Generate(existing, self._GenerateOneMAC, ec_id)
 
   @locking.ssynchronized(_config_lock, shared=1)
-  def IsMacInUse(self, mac):
-    """Predicate: check if the specified MAC is in use in the Ganeti cluster.
+  def ReserveMAC(self, mac, ec_id):
+    """Reserve a MAC for an instance.
 
     This only checks instances managed by this cluster, it does not
     check for potential collisions elsewhere.
 
     """
     all_macs = self._AllMACs()
-    return mac in all_macs or mac in self._temporary_macs
+    if mac in all_macs:
+      raise errors.ReservationError("mac already in use")
+    else:
+      self._temporary_macs.Reserve(mac, ec_id)
 
   @locking.ssynchronized(_config_lock, shared=1)
-  def GenerateDRBDSecret(self):
+  def ReserveLV(self, lv_name, ec_id):
+    """Reserve an VG/LV pair for an instance.
+
+    @type lv_name: string
+    @param lv_name: the logical volume name to reserve
+
+    """
+    all_lvs = self._AllLVs()
+    if lv_name in all_lvs:
+      raise errors.ReservationError("LV already in use")
+    else:
+      self._temporary_lvs.Reserve(lv_name, ec_id)
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GenerateDRBDSecret(self, ec_id):
     """Generate a DRBD secret.
 
     This checks the current disks for duplicates.
 
     """
-    all_secrets = self._AllDRBDSecrets()
-    retries = 64
-    while retries > 0:
-      secret = utils.GenerateSecret()
-      if secret not in all_secrets:
-        break
-      retries -= 1
-    else:
-      raise errors.ConfigurationError("Can't generate unique DRBD secret")
-    return secret
+    return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
+                                            utils.GenerateSecret,
+                                            ec_id)
 
   def _AllLVs(self):
     """Compute the list of all LVs.
@@ -225,14 +243,14 @@ class ConfigWriter:
     """
     existing = set()
     if include_temporary:
-      existing.update(self._temporary_ids)
+      existing.update(self._temporary_ids.GetReserved())
     existing.update(self._AllLVs())
     existing.update(self._config_data.instances.keys())
     existing.update(self._config_data.nodes.keys())
     existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
     return existing
 
-  def _GenerateUniqueID(self):
+  def _GenerateUniqueID(self, ec_id):
     """Generate an unique UUID.
 
     This checks the current node, instances and disk names for
@@ -242,33 +260,20 @@ class ConfigWriter:
     @return: the unique id
 
     """
-    existing = self._AllIDs(include_temporary=True)
-    retries = 64
-    while retries > 0:
-      unique_id = utils.NewUUID()
-      if unique_id not in existing and unique_id is not None:
-        break
-    else:
-      raise errors.ConfigurationError("Not able generate an unique ID"
-                                      " (last tried ID: %s" % unique_id)
-    self._temporary_ids.add(unique_id)
-    return unique_id
+    existing = self._AllIDs(include_temporary=False)
+    return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
 
   @locking.ssynchronized(_config_lock, shared=1)
-  def GenerateUniqueID(self):
+  def GenerateUniqueID(self, ec_id):
     """Generate an unique ID.
 
     This is just a wrapper over the unlocked version.
 
-    """
-    return self._GenerateUniqueID()
-
-  def _CleanupTemporaryIDs(self):
-    """Cleanups the _temporary_ids structure.
+    @type ec_id: string
+    @param ec_id: unique id for the job to reserve the id to
 
     """
-    existing = self._AllIDs(include_temporary=False)
-    self._temporary_ids = self._temporary_ids - existing
+    return self._GenerateUniqueID(ec_id)
 
   def _AllMACs(self):
     """Return all MACs present in the config.
@@ -366,6 +371,9 @@ class ConfigWriter:
     # per-instance checks
     for instance_name in data.instances:
       instance = data.instances[instance_name]
+      if instance.name != instance_name:
+        result.append("instance '%s' is indexed by wrong name '%s'" %
+                      (instance.name, instance_name))
       if instance.primary_node not in data.nodes:
         result.append("instance '%s' has invalid primary node '%s'" %
                       (instance_name, instance.primary_node))
@@ -412,7 +420,7 @@ class ConfigWriter:
     for pnum in keys:
       pdata = ports[pnum]
       if len(pdata) > 1:
-        txt = ", ".join(["%s/%s" % val for val in pdata])
+        txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
         result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
 
     # highest used tcp port check
@@ -431,7 +439,10 @@ class ConfigWriter:
                     (mc_now, mc_max))
 
     # node checks
-    for node in data.nodes.values():
+    for node_name, node in data.nodes.items():
+      if node.name != node_name:
+        result.append("Node '%s' is indexed by wrong name '%s'" %
+                      (node.name, node_name))
       if [node.master_candidate, node.drained, node.offline].count(True) > 1:
         result.append("Node %s state is invalid: master_candidate=%s,"
                       " drain=%s, offline=%s" %
@@ -439,28 +450,49 @@ class ConfigWriter:
                        node.offline))
 
     # drbd minors check
-    d_map, duplicates = self._UnlockedComputeDRBDMap()
+    _, duplicates = self._UnlockedComputeDRBDMap()
     for node, minor, instance_a, instance_b in duplicates:
       result.append("DRBD minor %d on node %s is assigned twice to instances"
                     " %s and %s" % (minor, node, instance_a, instance_b))
 
     # IP checks
-    ips = { data.cluster.master_ip: ["cluster_ip"] }
-    def _helper(ip, name):
-      if ip in ips:
-        ips[ip].append(name)
-      else:
-        ips[ip] = [name]
+    default_nicparams = data.cluster.nicparams[constants.PP_DEFAULT]
+    ips = {}
+
+    def _AddIpAddress(ip, name):
+      ips.setdefault(ip, []).append(name)
+
+    _AddIpAddress(data.cluster.master_ip, "cluster_ip")
 
     for node in data.nodes.values():
-      _helper(node.primary_ip, "node:%s/primary" % node.name)
+      _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
       if node.secondary_ip != node.primary_ip:
-        _helper(node.secondary_ip, "node:%s/secondary" % node.name)
+        _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
+
+    for instance in data.instances.values():
+      for idx, nic in enumerate(instance.nics):
+        if nic.ip is None:
+          continue
+
+        nicparams = objects.FillDict(default_nicparams, nic.nicparams)
+        nic_mode = nicparams[constants.NIC_MODE]
+        nic_link = nicparams[constants.NIC_LINK]
+
+        if nic_mode == constants.NIC_MODE_BRIDGED:
+          link = "bridge:%s" % nic_link
+        elif nic_mode == constants.NIC_MODE_ROUTED:
+          link = "route:%s" % nic_link
+        else:
+          raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
+
+        _AddIpAddress("%s/%s" % (link, nic.ip),
+                      "instance:%s/nic:%d" % (instance.name, idx))
 
     for ip, owners in ips.items():
       if len(owners) > 1:
         result.append("IP address %s is used by multiple owners: %s" %
-                      (ip, ", ".join(owners)))
+                      (ip, utils.CommaJoin(owners)))
+
     return result
 
   @locking.ssynchronized(_config_lock, shared=1)
@@ -809,8 +841,6 @@ class ConfigWriter:
     self._config_data.instances[instance.name] = instance
     self._config_data.cluster.serial_no += 1
     self._UnlockedReleaseDRBDMinors(instance.name)
-    for nic in instance.nics:
-      self._temporary_macs.discard(nic.mac)
     self._WriteConfig()
 
   def _EnsureUUID(self, item, ec_id):
@@ -821,10 +851,10 @@ class ConfigWriter:
 
     """
     if not item.uuid:
-      item.uuid = self._GenerateUniqueID()
-    elif item.uuid in self._AllIDs(temporary=True):
-      raise errors.ConfigurationError("Cannot add '%s': UUID already in use" %
-                                      (item.name, item.uuid))
+      item.uuid = self._GenerateUniqueID(ec_id)
+    elif item.uuid in self._AllIDs(include_temporary=True):
+      raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
+                                      " in use" % (item.name, item.uuid))
 
   def _SetInstanceStatus(self, instance_name, status):
     """Set the instance's status to a given value.
@@ -881,9 +911,9 @@ class ConfigWriter:
         # rename the file paths in logical and physical id
         file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
         disk.physical_id = disk.logical_id = (disk.logical_id[0],
-                                              os.path.join(file_storage_dir,
-                                                           inst.name,
-                                                           disk.iv_name))
+                                              utils.PathJoin(file_storage_dir,
+                                                             inst.name,
+                                                             disk.iv_name))
 
     self._config_data.instances[inst.name] = inst
     self._WriteConfig()
@@ -1020,7 +1050,6 @@ class ConfigWriter:
 
     return self._config_data.nodes[node_name]
 
-
   @locking.ssynchronized(_config_lock, shared=1)
   def GetNodeInfo(self, node_name):
     """Get the configuration of a node, as stored in the config.
@@ -1046,7 +1075,6 @@ class ConfigWriter:
     """
     return self._config_data.nodes.keys()
 
-
   @locking.ssynchronized(_config_lock, shared=1)
   def GetNodeList(self):
     """Return the list of nodes which are in the configuration.
@@ -1054,8 +1082,7 @@ class ConfigWriter:
     """
     return self._UnlockedGetNodeList()
 
-  @locking.ssynchronized(_config_lock, shared=1)
-  def GetOnlineNodeList(self):
+  def _UnlockedGetOnlineNodeList(self):
     """Return the list of nodes which are online.
 
     """
@@ -1064,6 +1091,13 @@ class ConfigWriter:
     return [node.name for node in all_nodes if not node.offline]
 
   @locking.ssynchronized(_config_lock, shared=1)
+  def GetOnlineNodeList(self):
+    """Return the list of nodes which are online.
+
+    """
+    return self._UnlockedGetOnlineNodeList()
+
+  @locking.ssynchronized(_config_lock, shared=1)
   def GetAllNodesInfo(self):
     """Get the configuration of all nodes.
 
@@ -1205,10 +1239,13 @@ class ConfigWriter:
     modified = False
     for item in self._AllUUIDObjects():
       if item.uuid is None:
-        item.uuid = self._GenerateUniqueID()
+        item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
         modified = True
     if modified:
       self._WriteConfig()
+      # This is ok even if it acquires the internal lock, as _UpgradeConfig is
+      # only called at config init time, without the lock held
+      self.DropECReservations(_UPGRADE_CONFIG_JID)
 
   def _DistributeConfig(self, feedback_fn):
     """Distribute the configuration to the other nodes.
@@ -1260,11 +1297,6 @@ class ConfigWriter:
     """
     assert feedback_fn is None or callable(feedback_fn)
 
-    # First, cleanup the _temporary_ids set, if an ID is now in the
-    # other objects it should be discarded to prevent unbounded growth
-    # of that structure
-    self._CleanupTemporaryIDs()
-
     # Warn on config errors, but don't abort the save - the
     # configuration has already been modified, and we can't revert;
     # the best we can do is to warn the user and save as is, leaving
@@ -1272,7 +1304,7 @@ class ConfigWriter:
     config_errors = self._UnlockedVerifyConfig()
     if config_errors:
       errmsg = ("Configuration data is not consistent: %s" %
-                (", ".join(config_errors)))
+                (utils.CommaJoin(config_errors)))
       logging.critical(errmsg)
       if feedback_fn:
         feedback_fn(errmsg)
@@ -1293,7 +1325,7 @@ class ConfigWriter:
     if self._last_cluster_serial < self._config_data.cluster.serial_no:
       if not self._offline:
         result = rpc.RpcRunner.call_write_ssconf_files(
-          self._UnlockedGetNodeList(),
+          self._UnlockedGetOnlineNodeList(),
           self._UnlockedGetSsconfValues())
 
         for nname, nresu in result.items():
@@ -1337,6 +1369,9 @@ class ConfigWriter:
 
     cluster = self._config_data.cluster
     cluster_tags = fn(cluster.GetTags())
+
+    hypervisor_list = fn(cluster.enabled_hypervisors)
+
     return {
       constants.SS_CLUSTER_NAME: cluster.cluster_name,
       constants.SS_CLUSTER_TAGS: cluster_tags,
@@ -1353,6 +1388,8 @@ class ConfigWriter:
       constants.SS_ONLINE_NODES: on_data,
       constants.SS_INSTANCE_LIST: instance_data,
       constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
+      constants.SS_HYPERVISOR_LIST: hypervisor_list,
+      constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
       }
 
   @locking.ssynchronized(_config_lock, shared=1)
@@ -1431,8 +1468,6 @@ class ConfigWriter:
 
     if isinstance(target, objects.Instance):
       self._UnlockedReleaseDRBDMinors(target.name)
-      for nic in target.nics:
-        self._temporary_macs.discard(nic.mac)
 
     self._WriteConfig(feedback_fn=feedback_fn)
 
@@ -1441,5 +1476,5 @@ class ConfigWriter:
     """Drop per-execution-context reservations
 
     """
-    pass
-
+    for rm in self._all_rms:
+      rm.DropECReservations(ec_id)