LUInstanceRecreateDisks: Actually acquire node resource locks
[ganeti-local] / lib / config.py
index df4f84f..7654ba1 100644 (file)
@@ -31,13 +31,14 @@ much memory.
 
 """
 
-# pylint: disable-msg=R0904
+# pylint: disable=R0904
 # R0904: Too many public methods
 
 import os
 import random
 import logging
 import time
+import itertools
 
 from ganeti import errors
 from ganeti import locking
@@ -164,8 +165,21 @@ class ConfigWriter:
     self._my_hostname = netutils.Hostname.GetSysName()
     self._last_cluster_serial = -1
     self._cfg_id = None
+    self._context = None
     self._OpenConfig(accept_foreign)
 
+  def _GetRpc(self, address_list):
+    """Returns RPC runner for configuration.
+
+    """
+    return rpc.ConfigRunner(self._context, address_list)
+
+  def SetContext(self, context):
+    """Sets Ganeti context.
+
+    """
+    self._context = context
+
   # this method needs to be static, so that we can call it on the class
   @staticmethod
   def IsCluster():
@@ -374,7 +388,7 @@ class ConfigWriter:
         configuration errors
 
     """
-    # pylint: disable-msg=R0914
+    # pylint: disable=R0914
     result = []
     seen_macs = []
     ports = {}
@@ -412,6 +426,24 @@ class ConfigWriter:
       except errors.ConfigurationError, err:
         result.append("%s has invalid nicparams: %s" % (owner, err))
 
+    def _helper_ipolicy(owner, params):
+      try:
+        objects.InstancePolicy.CheckParameterSyntax(params)
+      except errors.ConfigurationError, err:
+        result.append("%s has invalid instance policy: %s" % (owner, err))
+
+    def _helper_ispecs(owner, params):
+      for key, value in params.items():
+        if key in constants.IPOLICY_PARAMETERS:
+          fullkey = "ipolicy/" + key
+          _helper(owner, fullkey, value, constants.ISPECS_PARAMETER_TYPES)
+        else:
+          # FIXME: assuming list type
+          if not isinstance(value, list):
+            result.append("%s has invalid instance policy: for %s,"
+                          " expecting list, got %s" %
+                          (owner, key, type(value)))
+
     # check cluster parameters
     _helper("cluster", "beparams", cluster.SimpleFillBE({}),
             constants.BES_PARAMETER_TYPES)
@@ -420,6 +452,8 @@ class ConfigWriter:
     _helper_nic("cluster", cluster.SimpleFillNIC({}))
     _helper("cluster", "ndparams", cluster.SimpleFillND({}),
             constants.NDS_PARAMETER_TYPES)
+    _helper_ipolicy("cluster", cluster.SimpleFillIPolicy({}))
+    _helper_ispecs("cluster", cluster.SimpleFillIPolicy({}))
 
     # per-instance checks
     for instance_name in data.instances:
@@ -534,8 +568,11 @@ class ConfigWriter:
         result.append("duplicate node group name '%s'" % nodegroup.name)
       else:
         nodegroups_names.add(nodegroup.name)
+      group_name = "group %s" % nodegroup.name
+      _helper_ipolicy(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy))
+      _helper_ispecs(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy))
       if nodegroup.ndparams:
-        _helper("group %s" % nodegroup.name, "ndparams",
+        _helper(group_name, "ndparams",
                 cluster.SimpleFillND(nodegroup.ndparams),
                 constants.NDS_PARAMETER_TYPES)
 
@@ -877,6 +914,20 @@ class ConfigWriter:
     return self._config_data.cluster.master_netdev
 
   @locking.ssynchronized(_config_lock, shared=1)
+  def GetMasterNetmask(self):
+    """Get the netmask of the master node for this cluster.
+
+    """
+    return self._config_data.cluster.master_netmask
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetUseExternalMipScript(self):
+    """Get flag representing whether to use the external master IP setup script.
+
+    """
+    return self._config_data.cluster.use_external_mip_script
+
+  @locking.ssynchronized(_config_lock, shared=1)
   def GetFileStorageDir(self):
     """Get the file storage dir for this cluster.
 
@@ -923,6 +974,23 @@ class ConfigWriter:
     """
     return self._config_data.cluster.primary_ip_family
 
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetMasterNetworkParameters(self):
+    """Get network parameters of the master node.
+
+    @rtype: L{object.MasterNetworkParameters}
+    @return: network parameters of the master node
+
+    """
+    cluster = self._config_data.cluster
+    result = objects.MasterNetworkParameters(name=cluster.master_node,
+      ip=cluster.master_ip,
+      netmask=cluster.master_netmask,
+      netdev=cluster.master_netdev,
+      ip_family=cluster.primary_ip_family)
+
+    return result
+
   @locking.ssynchronized(_config_lock)
   def AddNodeGroup(self, group, ec_id, check_uuid=True):
     """Add a node group to the configuration.
@@ -1131,15 +1199,15 @@ class ConfigWriter:
     """Set the instance's status to a given value.
 
     """
-    assert isinstance(status, bool), \
+    assert status in constants.ADMINST_ALL, \
            "Invalid status '%s' passed to SetInstanceStatus" % (status,)
 
     if instance_name not in self._config_data.instances:
       raise errors.ConfigurationError("Unknown instance '%s'" %
                                       instance_name)
     instance = self._config_data.instances[instance_name]
-    if instance.admin_up != status:
-      instance.admin_up = status
+    if instance.admin_state != status:
+      instance.admin_state = status
       instance.serial_no += 1
       instance.mtime = time.time()
       self._WriteConfig()
@@ -1149,7 +1217,14 @@ class ConfigWriter:
     """Mark the instance status to up in the config.
 
     """
-    self._SetInstanceStatus(instance_name, True)
+    self._SetInstanceStatus(instance_name, constants.ADMINST_UP)
+
+  @locking.ssynchronized(_config_lock)
+  def MarkInstanceOffline(self, instance_name):
+    """Mark the instance status to down in the config.
+
+    """
+    self._SetInstanceStatus(instance_name, constants.ADMINST_OFFLINE)
 
   @locking.ssynchronized(_config_lock)
   def RemoveInstance(self, instance_name):
@@ -1158,6 +1233,14 @@ class ConfigWriter:
     """
     if instance_name not in self._config_data.instances:
       raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
+
+    # If a network port has been allocated to the instance,
+    # return it to the pool of free ports.
+    inst = self._config_data.instances[instance_name]
+    network_port = getattr(inst, "network_port", None)
+    if network_port is not None:
+      self._config_data.cluster.tcpudp_port_pool.add(network_port)
+
     del self._config_data.instances[instance_name]
     self._config_data.cluster.serial_no += 1
     self._WriteConfig()
@@ -1198,7 +1281,7 @@ class ConfigWriter:
     """Mark the status of an instance to down in the configuration.
 
     """
-    self._SetInstanceStatus(instance_name, False)
+    self._SetInstanceStatus(instance_name, constants.ADMINST_DOWN)
 
   def _UnlockedGetInstanceList(self):
     """Get the list of instances.
@@ -1297,6 +1380,22 @@ class ConfigWriter:
                     for instance in self._UnlockedGetInstanceList()])
     return my_dict
 
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetInstancesInfoByFilter(self, filter_fn):
+    """Get instance configuration with a filter.
+
+    @type filter_fn: callable
+    @param filter_fn: Filter function receiving instance object as parameter,
+      returning boolean. Important: this function is called while the
+      configuration locks is held. It must not do any complex work or call
+      functions potentially leading to a deadlock. Ideally it doesn't call any
+      other functions and just compares instance attributes.
+
+    """
+    return dict((name, inst)
+                for (name, inst) in self._config_data.instances.items()
+                if filter_fn(inst))
+
   @locking.ssynchronized(_config_lock)
   def AddNode(self, node, ec_id):
     """Add a node to the configuration.
@@ -1481,9 +1580,16 @@ class ConfigWriter:
               would GetNodeInfo return for the node
 
     """
-    my_dict = dict([(node, self._UnlockedGetNodeInfo(node))
-                    for node in self._UnlockedGetNodeList()])
-    return my_dict
+    return self._UnlockedGetAllNodesInfo()
+
+  def _UnlockedGetAllNodesInfo(self):
+    """Gets configuration of all nodes.
+
+    @note: See L{GetAllNodesInfo}
+
+    """
+    return dict([(node, self._UnlockedGetNodeInfo(node))
+                 for node in self._UnlockedGetNodeList()])
 
   @locking.ssynchronized(_config_lock, shared=1)
   def GetNodeGroupsFromNodes(self, nodes):
@@ -1594,6 +1700,79 @@ class ConfigWriter:
     else:
       nodegroup_obj.members.remove(node.name)
 
+  @locking.ssynchronized(_config_lock)
+  def AssignGroupNodes(self, mods):
+    """Changes the group of a number of nodes.
+
+    @type mods: list of tuples; (node name, new group UUID)
+    @param mods: Node membership modifications
+
+    """
+    groups = self._config_data.nodegroups
+    nodes = self._config_data.nodes
+
+    resmod = []
+
+    # Try to resolve names/UUIDs first
+    for (node_name, new_group_uuid) in mods:
+      try:
+        node = nodes[node_name]
+      except KeyError:
+        raise errors.ConfigurationError("Unable to find node '%s'" % node_name)
+
+      if node.group == new_group_uuid:
+        # Node is being assigned to its current group
+        logging.debug("Node '%s' was assigned to its current group (%s)",
+                      node_name, node.group)
+        continue
+
+      # Try to find current group of node
+      try:
+        old_group = groups[node.group]
+      except KeyError:
+        raise errors.ConfigurationError("Unable to find old group '%s'" %
+                                        node.group)
+
+      # Try to find new group for node
+      try:
+        new_group = groups[new_group_uuid]
+      except KeyError:
+        raise errors.ConfigurationError("Unable to find new group '%s'" %
+                                        new_group_uuid)
+
+      assert node.name in old_group.members, \
+        ("Inconsistent configuration: node '%s' not listed in members for its"
+         " old group '%s'" % (node.name, old_group.uuid))
+      assert node.name not in new_group.members, \
+        ("Inconsistent configuration: node '%s' already listed in members for"
+         " its new group '%s'" % (node.name, new_group.uuid))
+
+      resmod.append((node, old_group, new_group))
+
+    # Apply changes
+    for (node, old_group, new_group) in resmod:
+      assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \
+        "Assigning to current group is not possible"
+
+      node.group = new_group.uuid
+
+      # Update members of involved groups
+      if node.name in old_group.members:
+        old_group.members.remove(node.name)
+      if node.name not in new_group.members:
+        new_group.members.append(node.name)
+
+    # Update timestamps and serials (only once per node/group object)
+    now = time.time()
+    for obj in frozenset(itertools.chain(*resmod)): # pylint: disable=W0142
+      obj.serial_no += 1
+      obj.mtime = now
+
+    # Force ssconf update
+    self._config_data.cluster.serial_no += 1
+
+    self._WriteConfig()
+
   def _BumpSerialNo(self):
     """Bump up the serial number of the config.
 
@@ -1716,8 +1895,9 @@ class ConfigWriter:
       node_list.append(node_info.name)
       addr_list.append(node_info.primary_ip)
 
-    result = rpc.RpcRunner.call_upload_file(node_list, self._cfg_file,
-                                            address_list=addr_list)
+    # TODO: Use dedicated resolver talking to config writer for name resolution
+    result = \
+      self._GetRpc(addr_list).call_upload_file(node_list, self._cfg_file)
     for to_node, to_result in result.items():
       msg = to_result.fail_msg
       if msg:
@@ -1776,7 +1956,7 @@ class ConfigWriter:
     # Write ssconf files on all nodes (including locally)
     if self._last_cluster_serial < self._config_data.cluster.serial_no:
       if not self._offline:
-        result = rpc.RpcRunner.call_write_ssconf_files(
+        result = self._GetRpc(None).call_write_ssconf_files(
           self._UnlockedGetOnlineNodeList(),
           self._UnlockedGetSsconfValues())
 
@@ -1839,6 +2019,7 @@ class ConfigWriter:
       constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
       constants.SS_MASTER_IP: cluster.master_ip,
       constants.SS_MASTER_NETDEV: cluster.master_netdev,
+      constants.SS_MASTER_NETMASK: str(cluster.master_netmask),
       constants.SS_MASTER_NODE: cluster.master_node,
       constants.SS_NODE_LIST: node_data,
       constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,