Don't pass sstore to LUs anymore

[ganeti-local] / lib / config.py
diff --git a/lib/config.py b/lib/config.py

index ef53cde..57c56d8 100644 (file)
--- a/lib/config.py
+++ b/lib/config.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#
  #
  
  # Copyright (C) 2006, 2007 Google Inc.
  #
  
  # Copyright (C) 2006, 2007 Google Inc.
@@ -21,46 +21,39 @@
  
  """Configuration management for Ganeti
  
  
  """Configuration management for Ganeti
  
-This module provides the interface to the ganeti cluster configuration.
-
+This module provides the interface to the Ganeti cluster configuration.
  
  
-The configuration data is stored on every node but is updated on the
-master only. After each update, the master distributes the data to the
-other nodes.
+The configuration data is stored on every node but is updated on the master
+only. After each update, the master distributes the data to the other nodes.
  
  
-Currently the data storage format is pickle as yaml was initially not
-available, then we used it but it was a memory-eating slow beast, so
-we reverted to pickle using custom Unpicklers.
+Currently, the data storage format is JSON. YAML was slow and consuming too
+much memory.
  
  """
  
  import os
  
  """
  
  import os
-import socket
  import tempfile
  import random
  import tempfile
  import random
+import logging
  
  from ganeti import errors
  
  from ganeti import errors
-from ganeti import logger
+from ganeti import locking
  from ganeti import utils
  from ganeti import constants
  from ganeti import rpc
  from ganeti import objects
  from ganeti import utils
  from ganeti import constants
  from ganeti import rpc
  from ganeti import objects
+from ganeti import serializer
  
  
-def _my_uuidgen():
-  """Poor-man's uuidgen using the uuidgen binary.
  
  
-  """
-  result = utils.RunCmd(["uuidgen", "-r"])
-  if result.failed:
-    return None
-  return result.stdout.rstrip('\n')
+_config_lock = locking.SharedLock()
  
  
  
  
-try:
-  import uuid
-  _uuidgen = uuid.uuid4
-except ImportError:
-  _uuidgen = _my_uuidgen
+def _ValidateConfig(data):
+  if data.version != constants.CONFIG_VERSION:
+    raise errors.ConfigurationError("Cluster configuration version"
+                                    " mismatch, got %s instead of %s" %
+                                    (data.version,
+                                     constants.CONFIG_VERSION))
  
  
  class ConfigWriter:
  
  
  class ConfigWriter:
@@ -68,6 +61,8 @@ class ConfigWriter:
  
    """
    def __init__(self, cfg_file=None, offline=False):
  
    """
    def __init__(self, cfg_file=None, offline=False):
+    self.write_count = 0
+    self._lock = _config_lock
      self._config_data = None
      self._config_time = None
      self._config_size = None
      self._config_data = None
      self._config_time = None
      self._config_size = None
@@ -78,6 +73,12 @@ class ConfigWriter:
      else:
        self._cfg_file = cfg_file
      self._temporary_ids = set()
      else:
        self._cfg_file = cfg_file
      self._temporary_ids = set()
+    self._temporary_drbds = {}
+    # Note: in order to prevent errors when resolving our name in
+    # _DistributeConfig, we compute it here once and reuse it; it's
+    # better to raise an error before starting to modify the config
+    # file than after it was modified
+    self._my_hostname = utils.HostInfo().name
  
    # this method needs to be static, so that we can call it on the class
    @staticmethod
  
    # this method needs to be static, so that we can call it on the class
    @staticmethod
@@ -87,6 +88,7 @@ class ConfigWriter:
      """
      return os.path.exists(constants.CLUSTER_CONF_FILE)
  
      """
      return os.path.exists(constants.CLUSTER_CONF_FILE)
  
+  @locking.ssynchronized(_config_lock, shared=1)
    def GenerateMAC(self):
      """Generate a MAC for an instance.
  
    def GenerateMAC(self):
      """Generate a MAC for an instance.
  
@@ -94,7 +96,6 @@ class ConfigWriter:
  
      """
      self._OpenConfig()
  
      """
      self._OpenConfig()
-    self._ReleaseLock()
      prefix = self._config_data.cluster.mac_prefix
      all_macs = self._AllMACs()
      retries = 64
      prefix = self._config_data.cluster.mac_prefix
      all_macs = self._AllMACs()
      retries = 64
@@ -110,12 +111,42 @@ class ConfigWriter:
        raise errors.ConfigurationError("Can't generate unique MAC")
      return mac
  
        raise errors.ConfigurationError("Can't generate unique MAC")
      return mac
  
+  @locking.ssynchronized(_config_lock, shared=1)
+  def IsMacInUse(self, mac):
+    """Predicate: check if the specified MAC is in use in the Ganeti cluster.
+
+    This only checks instances managed by this cluster, it does not
+    check for potential collisions elsewhere.
+
+    """
+    self._OpenConfig()
+    all_macs = self._AllMACs()
+    return mac in all_macs
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GenerateDRBDSecret(self):
+    """Generate a DRBD secret.
+
+    This checks the current disks for duplicates.
+
+    """
+    self._OpenConfig()
+    all_secrets = self._AllDRBDSecrets()
+    retries = 64
+    while retries > 0:
+      secret = utils.GenerateSecret()
+      if secret not in all_secrets:
+        break
+      retries -= 1
+    else:
+      raise errors.ConfigurationError("Can't generate unique DRBD secret")
+    return secret
+
    def _ComputeAllLVs(self):
      """Compute the list of all LVs.
  
      """
      self._OpenConfig()
    def _ComputeAllLVs(self):
      """Compute the list of all LVs.
  
      """
      self._OpenConfig()
-    self._ReleaseLock()
      lvnames = set()
      for instance in self._config_data.instances.values():
        node_data = instance.MapLVsByNode()
      lvnames = set()
      for instance in self._config_data.instances.values():
        node_data = instance.MapLVsByNode()
@@ -123,6 +154,7 @@ class ConfigWriter:
          lvnames.update(lv_list)
      return lvnames
  
          lvnames.update(lv_list)
      return lvnames
  
+  @locking.ssynchronized(_config_lock, shared=1)
    def GenerateUniqueID(self, exceptions=None):
      """Generate an unique disk name.
  
    def GenerateUniqueID(self, exceptions=None):
      """Generate an unique disk name.
  
@@ -147,7 +179,7 @@ class ConfigWriter:
        existing.update(exceptions)
      retries = 64
      while retries > 0:
        existing.update(exceptions)
      retries = 64
      while retries > 0:
-      unique_id = _uuidgen()
+      unique_id = utils.NewUUID()
        if unique_id not in existing and unique_id is not None:
          break
      else:
        if unique_id not in existing and unique_id is not None:
          break
      else:
@@ -161,7 +193,6 @@ class ConfigWriter:
  
      """
      self._OpenConfig()
  
      """
      self._OpenConfig()
-    self._ReleaseLock()
  
      result = []
      for instance in self._config_data.instances.values():
  
      result = []
      for instance in self._config_data.instances.values():
@@ -170,33 +201,90 @@ class ConfigWriter:
  
      return result
  
  
      return result
  
+  def _AllDRBDSecrets(self):
+    """Return all DRBD secrets present in the config.
+
+    """
+    def helper(disk, result):
+      """Recursively gather secrets from this disk."""
+      if disk.dev_type == constants.DT_DRBD8:
+        result.append(disk.logical_id[5])
+      if disk.children:
+        for child in disk.children:
+          helper(child, result)
+
+    result = []
+    for instance in self._config_data.instances.values():
+      for disk in instance.disks:
+        helper(disk, result)
+
+    return result
+
+  @locking.ssynchronized(_config_lock, shared=1)
    def VerifyConfig(self):
      """Stub verify function.
      """
      self._OpenConfig()
    def VerifyConfig(self):
      """Stub verify function.
      """
      self._OpenConfig()
-    self._ReleaseLock()
  
      result = []
      seen_macs = []
  
      result = []
      seen_macs = []
+    ports = {}
      data = self._config_data
      for instance_name in data.instances:
        instance = data.instances[instance_name]
        if instance.primary_node not in data.nodes:
      data = self._config_data
      for instance_name in data.instances:
        instance = data.instances[instance_name]
        if instance.primary_node not in data.nodes:
-        result.append("Instance '%s' has invalid primary node '%s'" %
+        result.append("instance '%s' has invalid primary node '%s'" %
                        (instance_name, instance.primary_node))
        for snode in instance.secondary_nodes:
          if snode not in data.nodes:
                        (instance_name, instance.primary_node))
        for snode in instance.secondary_nodes:
          if snode not in data.nodes:
-          result.append("Instance '%s' has invalid secondary node '%s'" %
+          result.append("instance '%s' has invalid secondary node '%s'" %
                          (instance_name, snode))
        for idx, nic in enumerate(instance.nics):
          if nic.mac in seen_macs:
                          (instance_name, snode))
        for idx, nic in enumerate(instance.nics):
          if nic.mac in seen_macs:
-          result.append("Instance '%s' has NIC %d mac %s duplicate" %
+          result.append("instance '%s' has NIC %d mac %s duplicate" %
                          (instance_name, idx, nic.mac))
          else:
            seen_macs.append(nic.mac)
                          (instance_name, idx, nic.mac))
          else:
            seen_macs.append(nic.mac)
+
+      # gather the drbd ports for duplicate checks
+      for dsk in instance.disks:
+        if dsk.dev_type in constants.LDS_DRBD:
+          tcp_port = dsk.logical_id[2]
+          if tcp_port not in ports:
+            ports[tcp_port] = []
+          ports[tcp_port].append((instance.name, "drbd disk %s" % dsk.iv_name))
+      # gather network port reservation
+      net_port = getattr(instance, "network_port", None)
+      if net_port is not None:
+        if net_port not in ports:
+          ports[net_port] = []
+        ports[net_port].append((instance.name, "network port"))
+
+    # cluster-wide pool of free ports
+    for free_port in self._config_data.cluster.tcpudp_port_pool:
+      if free_port not in ports:
+        ports[free_port] = []
+      ports[free_port].append(("cluster", "port marked as free"))
+
+    # compute tcp/udp duplicate ports
+    keys = ports.keys()
+    keys.sort()
+    for pnum in keys:
+      pdata = ports[pnum]
+      if len(pdata) > 1:
+        txt = ", ".join(["%s/%s" % val for val in pdata])
+        result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
+
+    # highest used tcp port check
+    if keys:
+      if keys[-1] > self._config_data.cluster.highest_used_port:
+        result.append("Highest used port mismatch, saved %s, computed %s" %
+                      (self._config_data.cluster.highest_used_port,
+                       keys[-1]))
+
      return result
  
      return result
  
-  def SetDiskID(self, disk, node_name):
+  def _UnlockedSetDiskID(self, disk, node_name):
      """Convert the unique ID to the ID needed on the target nodes.
  
      This is used only for drbd, which needs ip/port configuration.
      """Convert the unique ID to the ID needed on the target nodes.
  
      This is used only for drbd, which needs ip/port configuration.
@@ -205,33 +293,49 @@ class ConfigWriter:
      this helps when the only the top device is passed to the remote
      node.
  
      this helps when the only the top device is passed to the remote
      node.
  
+    This function is for internal use, when the config lock is already held.
+
      """
      if disk.children:
        for child in disk.children:
      """
      if disk.children:
        for child in disk.children:
-        self.SetDiskID(child, node_name)
+        self._UnlockedSetDiskID(child, node_name)
  
      if disk.logical_id is None and disk.physical_id is not None:
        return
  
      if disk.logical_id is None and disk.physical_id is not None:
        return
-    if disk.dev_type == "drbd":
-      pnode, snode, port = disk.logical_id
+    if disk.dev_type == constants.LD_DRBD8:
+      pnode, snode, port, pminor, sminor, secret = disk.logical_id
        if node_name not in (pnode, snode):
          raise errors.ConfigurationError("DRBD device not knowing node %s" %
                                          node_name)
        if node_name not in (pnode, snode):
          raise errors.ConfigurationError("DRBD device not knowing node %s" %
                                          node_name)
-      pnode_info = self.GetNodeInfo(pnode)
-      snode_info = self.GetNodeInfo(snode)
+      pnode_info = self._UnlockedGetNodeInfo(pnode)
+      snode_info = self._UnlockedGetNodeInfo(snode)
        if pnode_info is None or snode_info is None:
          raise errors.ConfigurationError("Can't find primary or secondary node"
                                          " for %s" % str(disk))
        if pnode_info is None or snode_info is None:
          raise errors.ConfigurationError("Can't find primary or secondary node"
                                          " for %s" % str(disk))
+      p_data = (pnode_info.secondary_ip, port)
+      s_data = (snode_info.secondary_ip, port)
        if pnode == node_name:
        if pnode == node_name:
-        disk.physical_id = (pnode_info.secondary_ip, port,
-                            snode_info.secondary_ip, port)
+        disk.physical_id = p_data + s_data + (pminor, secret)
        else: # it must be secondary, we tested above
        else: # it must be secondary, we tested above
-        disk.physical_id = (snode_info.secondary_ip, port,
-                            pnode_info.secondary_ip, port)
+        disk.physical_id = s_data + p_data + (sminor, secret)
      else:
        disk.physical_id = disk.logical_id
      return
  
      else:
        disk.physical_id = disk.logical_id
      return
  
+  @locking.ssynchronized(_config_lock)
+  def SetDiskID(self, disk, node_name):
+    """Convert the unique ID to the ID needed on the target nodes.
+
+    This is used only for drbd, which needs ip/port configuration.
+
+    The routine descends down and updates its children also, because
+    this helps when the only the top device is passed to the remote
+    node.
+
+    """
+    return self._UnlockedSetDiskID(disk, node_name)
+
+  @locking.ssynchronized(_config_lock)
    def AddTcpUdpPort(self, port):
      """Adds a new port to the available port pool.
  
    def AddTcpUdpPort(self, port):
      """Adds a new port to the available port pool.
  
@@ -243,14 +347,15 @@ class ConfigWriter:
      self._config_data.cluster.tcpudp_port_pool.add(port)
      self._WriteConfig()
  
      self._config_data.cluster.tcpudp_port_pool.add(port)
      self._WriteConfig()
  
+  @locking.ssynchronized(_config_lock, shared=1)
    def GetPortList(self):
      """Returns a copy of the current port list.
  
      """
      self._OpenConfig()
    def GetPortList(self):
      """Returns a copy of the current port list.
  
      """
      self._OpenConfig()
-    self._ReleaseLock()
      return self._config_data.cluster.tcpudp_port_pool.copy()
  
      return self._config_data.cluster.tcpudp_port_pool.copy()
  
+  @locking.ssynchronized(_config_lock)
    def AllocatePort(self):
      """Allocate a port.
  
    def AllocatePort(self):
      """Allocate a port.
  
@@ -275,6 +380,158 @@ class ConfigWriter:
      self._WriteConfig()
      return port
  
      self._WriteConfig()
      return port
  
+  def _ComputeDRBDMap(self, instance):
+    """Compute the used DRBD minor/nodes.
+
+    Return: dictionary of node_name: dict of minor: instance_name. The
+    returned dict will have all the nodes in it (even if with an empty
+    list).
+
+    """
+    def _AppendUsedPorts(instance_name, disk, used):
+      if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
+        nodeA, nodeB, dummy, minorA, minorB = disk.logical_id[:5]
+        for node, port in ((nodeA, minorA), (nodeB, minorB)):
+          assert node in used, "Instance node not found in node list"
+          if port in used[node]:
+            raise errors.ProgrammerError("DRBD minor already used:"
+                                         " %s/%s, %s/%s" %
+                                         (node, port, instance_name,
+                                          used[node][port]))
+
+          used[node][port] = instance_name
+      if disk.children:
+        for child in disk.children:
+          _AppendUsedPorts(instance_name, child, used)
+
+    my_dict = dict((node, {}) for node in self._config_data.nodes)
+    for (node, minor), instance in self._temporary_drbds.iteritems():
+      my_dict[node][minor] = instance
+    for instance in self._config_data.instances.itervalues():
+      for disk in instance.disks:
+        _AppendUsedPorts(instance.name, disk, my_dict)
+    return my_dict
+
+  @locking.ssynchronized(_config_lock)
+  def AllocateDRBDMinor(self, nodes, instance):
+    """Allocate a drbd minor.
+
+    The free minor will be automatically computed from the existing
+    devices. A node can be given multiple times in order to allocate
+    multiple minors. The result is the list of minors, in the same
+    order as the passed nodes.
+
+    """
+    self._OpenConfig()
+
+    d_map = self._ComputeDRBDMap(instance)
+    result = []
+    for nname in nodes:
+      ndata = d_map[nname]
+      if not ndata:
+        # no minors used, we can start at 0
+        result.append(0)
+        ndata[0] = instance
+        self._temporary_drbds[(nname, 0)] = instance
+        continue
+      keys = ndata.keys()
+      keys.sort()
+      ffree = utils.FirstFree(keys)
+      if ffree is None:
+        # return the next minor
+        # TODO: implement high-limit check
+        minor = keys[-1] + 1
+      else:
+        minor = ffree
+      result.append(minor)
+      ndata[minor] = instance
+      assert (nname, minor) not in self._temporary_drbds, \
+             "Attempt to reuse reserved DRBD minor"
+      self._temporary_drbds[(nname, minor)] = instance
+    logging.debug("Request to allocate drbd minors, input: %s, returning %s",
+                  nodes, result)
+    return result
+
+  @locking.ssynchronized(_config_lock)
+  def ReleaseDRBDMinors(self, instance):
+    """Release temporary drbd minors allocated for a given instance.
+
+    This should be called on both the error paths and on the success
+    paths (after the instance has been added or updated).
+
+    @type instance: string
+    @param instance: the instance for which temporary minors should be
+                     released
+
+    """
+    for key, name in self._temporary_drbds.items():
+      if name == instance:
+        del self._temporary_drbds[key]
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetConfigVersion(self):
+    """Get the configuration version.
+
+    @return: Config version
+
+    """
+    return self._config_data.version
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetClusterName(self):
+    """Get cluster name.
+
+    @return: Cluster name
+
+    """
+    self._OpenConfig()
+    return self._config_data.cluster.cluster_name
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetMasterNode(self):
+    """Get the hostname of the master node for this cluster.
+
+    @return: Master hostname
+
+    """
+    self._OpenConfig()
+    return self._config_data.cluster.master_node
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetMasterIP(self):
+    """Get the IP of the master node for this cluster.
+
+    @return: Master IP
+
+    """
+    self._OpenConfig()
+    return self._config_data.cluster.master_ip
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetMasterNetdev(self):
+    """Get the master network device for this cluster.
+
+    """
+    self._OpenConfig()
+    return self._config_data.cluster.master_netdev
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetFileStorageDir(self):
+    """Get the file storage dir for this cluster.
+
+    """
+    self._OpenConfig()
+    return self._config_data.cluster.file_storage_dir
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetHypervisorType(self):
+    """Get the hypervisor type for this cluster.
+
+    """
+    self._OpenConfig()
+    return self._config_data.cluster.hypervisor
+
+  @locking.ssynchronized(_config_lock, shared=1)
    def GetHostKey(self):
      """Return the rsa hostkey from the config.
  
    def GetHostKey(self):
      """Return the rsa hostkey from the config.
  
@@ -283,9 +540,9 @@ class ConfigWriter:
      Returns: rsa hostkey
      """
      self._OpenConfig()
      Returns: rsa hostkey
      """
      self._OpenConfig()
-    self._ReleaseLock()
      return self._config_data.cluster.rsahostkeypub
  
      return self._config_data.cluster.rsahostkeypub
  
+  @locking.ssynchronized(_config_lock)
    def AddInstance(self, instance):
      """Add an instance to the config.
  
    def AddInstance(self, instance):
      """Add an instance to the config.
  
@@ -299,25 +556,41 @@ class ConfigWriter:
  
      if instance.disk_template != constants.DT_DISKLESS:
        all_lvs = instance.MapLVsByNode()
  
      if instance.disk_template != constants.DT_DISKLESS:
        all_lvs = instance.MapLVsByNode()
-      logger.Info("Instance '%s' DISK_LAYOUT: %s" % (instance.name, all_lvs))
+      logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
  
      self._OpenConfig()
  
      self._OpenConfig()
+    instance.serial_no = 1
      self._config_data.instances[instance.name] = instance
      self._config_data.instances[instance.name] = instance
+    self._config_data.cluster.serial_no += 1
      self._WriteConfig()
  
      self._WriteConfig()
  
-  def MarkInstanceUp(self, instance_name):
-    """Mark the instance status to up in the config.
+  def _SetInstanceStatus(self, instance_name, status):
+    """Set the instance's status to a given value.
  
      """
  
      """
+    if status not in ("up", "down"):
+      raise errors.ProgrammerError("Invalid status '%s' passed to"
+                                   " ConfigWriter._SetInstanceStatus()" %
+                                   status)
      self._OpenConfig()
  
      if instance_name not in self._config_data.instances:
        raise errors.ConfigurationError("Unknown instance '%s'" %
                                        instance_name)
      instance = self._config_data.instances[instance_name]
      self._OpenConfig()
  
      if instance_name not in self._config_data.instances:
        raise errors.ConfigurationError("Unknown instance '%s'" %
                                        instance_name)
      instance = self._config_data.instances[instance_name]
-    instance.status = "up"
-    self._WriteConfig()
+    if instance.status != status:
+      instance.status = status
+      instance.serial_no += 1
+      self._WriteConfig()
+
+  @locking.ssynchronized(_config_lock)
+  def MarkInstanceUp(self, instance_name):
+    """Mark the instance status to up in the config.
+
+    """
+    self._SetInstanceStatus(instance_name, "up")
  
  
+  @locking.ssynchronized(_config_lock)
    def RemoveInstance(self, instance_name):
      """Remove the instance from the configuration.
  
    def RemoveInstance(self, instance_name):
      """Remove the instance from the configuration.
  
@@ -327,8 +600,10 @@ class ConfigWriter:
      if instance_name not in self._config_data.instances:
        raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
      del self._config_data.instances[instance_name]
      if instance_name not in self._config_data.instances:
        raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
      del self._config_data.instances[instance_name]
+    self._config_data.cluster.serial_no += 1
      self._WriteConfig()
  
      self._WriteConfig()
  
+  @locking.ssynchronized(_config_lock)
    def RenameInstance(self, old_name, new_name):
      """Rename an instance.
  
    def RenameInstance(self, old_name, new_name):
      """Rename an instance.
  
@@ -343,21 +618,37 @@ class ConfigWriter:
      inst = self._config_data.instances[old_name]
      del self._config_data.instances[old_name]
      inst.name = new_name
      inst = self._config_data.instances[old_name]
      del self._config_data.instances[old_name]
      inst.name = new_name
+
+    for disk in inst.disks:
+      if disk.dev_type == constants.LD_FILE:
+        # rename the file paths in logical and physical id
+        file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
+        disk.physical_id = disk.logical_id = (disk.logical_id[0],
+                                              os.path.join(file_storage_dir,
+                                                           inst.name,
+                                                           disk.iv_name))
+
      self._config_data.instances[inst.name] = inst
      self._config_data.instances[inst.name] = inst
+    self._config_data.cluster.serial_no += 1
      self._WriteConfig()
  
      self._WriteConfig()
  
+  @locking.ssynchronized(_config_lock)
    def MarkInstanceDown(self, instance_name):
      """Mark the status of an instance to down in the configuration.
  
      """
    def MarkInstanceDown(self, instance_name):
      """Mark the status of an instance to down in the configuration.
  
      """
-    self._OpenConfig()
+    self._SetInstanceStatus(instance_name, "down")
  
  
-    if instance_name not in self._config_data.instances:
-      raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
-    instance = self._config_data.instances[instance_name]
-    instance.status = "down"
-    self._WriteConfig()
+  def _UnlockedGetInstanceList(self):
+    """Get the list of instances.
+
+    This function is for internal use, when the config lock is already held.
+
+    """
+    self._OpenConfig()
+    return self._config_data.instances.keys()
  
  
+  @locking.ssynchronized(_config_lock, shared=1)
    def GetInstanceList(self):
      """Get the list of instances.
  
    def GetInstanceList(self):
      """Get the list of instances.
  
@@ -366,21 +657,32 @@ class ConfigWriter:
        these contains all the instances, also the ones in Admin_down state
  
      """
        these contains all the instances, also the ones in Admin_down state
  
      """
-    self._OpenConfig()
-    self._ReleaseLock()
-
-    return self._config_data.instances.keys()
+    return self._UnlockedGetInstanceList()
  
  
+  @locking.ssynchronized(_config_lock, shared=1)
    def ExpandInstanceName(self, short_name):
      """Attempt to expand an incomplete instance name.
  
      """
      self._OpenConfig()
    def ExpandInstanceName(self, short_name):
      """Attempt to expand an incomplete instance name.
  
      """
      self._OpenConfig()
-    self._ReleaseLock()
  
      return utils.MatchNameComponent(short_name,
                                      self._config_data.instances.keys())
  
  
      return utils.MatchNameComponent(short_name,
                                      self._config_data.instances.keys())
  
+  def _UnlockedGetInstanceInfo(self, instance_name):
+    """Returns informations about an instance.
+
+    This function is for internal use, when the config lock is already held.
+
+    """
+    self._OpenConfig()
+
+    if instance_name not in self._config_data.instances:
+      return None
+
+    return self._config_data.instances[instance_name]
+
+  @locking.ssynchronized(_config_lock, shared=1)
    def GetInstanceInfo(self, instance_name):
      """Returns informations about an instance.
  
    def GetInstanceInfo(self, instance_name):
      """Returns informations about an instance.
  
@@ -394,14 +696,22 @@ class ConfigWriter:
        the instance object
  
      """
        the instance object
  
      """
-    self._OpenConfig()
-    self._ReleaseLock()
+    return self._UnlockedGetInstanceInfo(instance_name)
  
  
-    if instance_name not in self._config_data.instances:
-      return None
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetAllInstancesInfo(self):
+    """Get the configuration of all instances.
  
  
-    return self._config_data.instances[instance_name]
+    @rtype: dict
+    @returns: dict of (instance, instance_info), where instance_info is what
+              would GetInstanceInfo return for the node
+
+    """
+    my_dict = dict([(instance, self._UnlockedGetInstanceInfo(instance))
+                    for instance in self._UnlockedGetInstanceList()])
+    return my_dict
  
  
+  @locking.ssynchronized(_config_lock)
    def AddNode(self, node):
      """Add a node to the configuration.
  
    def AddNode(self, node):
      """Add a node to the configuration.
  
@@ -409,67 +719,103 @@ class ConfigWriter:
        node: an object.Node instance
  
      """
        node: an object.Node instance
  
      """
+    logging.info("Adding node %s to configuration" % node.name)
+
      self._OpenConfig()
      self._OpenConfig()
+    node.serial_no = 1
      self._config_data.nodes[node.name] = node
      self._config_data.nodes[node.name] = node
+    self._config_data.cluster.serial_no += 1
      self._WriteConfig()
  
      self._WriteConfig()
  
+  @locking.ssynchronized(_config_lock)
    def RemoveNode(self, node_name):
      """Remove a node from the configuration.
  
      """
    def RemoveNode(self, node_name):
      """Remove a node from the configuration.
  
      """
+    logging.info("Removing node %s from configuration" % node_name)
+
      self._OpenConfig()
      if node_name not in self._config_data.nodes:
        raise errors.ConfigurationError("Unknown node '%s'" % node_name)
  
      del self._config_data.nodes[node_name]
      self._OpenConfig()
      if node_name not in self._config_data.nodes:
        raise errors.ConfigurationError("Unknown node '%s'" % node_name)
  
      del self._config_data.nodes[node_name]
+    self._config_data.cluster.serial_no += 1
      self._WriteConfig()
  
      self._WriteConfig()
  
+  @locking.ssynchronized(_config_lock, shared=1)
    def ExpandNodeName(self, short_name):
      """Attempt to expand an incomplete instance name.
  
      """
      self._OpenConfig()
    def ExpandNodeName(self, short_name):
      """Attempt to expand an incomplete instance name.
  
      """
      self._OpenConfig()
-    self._ReleaseLock()
  
      return utils.MatchNameComponent(short_name,
                                      self._config_data.nodes.keys())
  
  
      return utils.MatchNameComponent(short_name,
                                      self._config_data.nodes.keys())
  
-  def GetNodeInfo(self, node_name):
+  def _UnlockedGetNodeInfo(self, node_name):
      """Get the configuration of a node, as stored in the config.
  
      """Get the configuration of a node, as stored in the config.
  
+    This function is for internal use, when the config lock is already held.
+
      Args: node: nodename (tuple) of the node
  
      Returns: the node object
  
      """
      self._OpenConfig()
      Args: node: nodename (tuple) of the node
  
      Returns: the node object
  
      """
      self._OpenConfig()
-    self._ReleaseLock()
  
      if node_name not in self._config_data.nodes:
        return None
  
      return self._config_data.nodes[node_name]
  
  
      if node_name not in self._config_data.nodes:
        return None
  
      return self._config_data.nodes[node_name]
  
-  def GetNodeList(self):
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetNodeInfo(self, node_name):
+    """Get the configuration of a node, as stored in the config.
+
+    Args: node: nodename (tuple) of the node
+
+    Returns: the node object
+
+    """
+    return self._UnlockedGetNodeInfo(node_name)
+
+  def _UnlockedGetNodeList(self):
      """Return the list of nodes which are in the configuration.
  
      """Return the list of nodes which are in the configuration.
  
+    This function is for internal use, when the config lock is already held.
+
      """
      self._OpenConfig()
      """
      self._OpenConfig()
-    self._ReleaseLock()
      return self._config_data.nodes.keys()
  
      return self._config_data.nodes.keys()
  
-  def DumpConfig(self):
-    """Return the entire configuration of the cluster.
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetNodeList(self):
+    """Return the list of nodes which are in the configuration.
+
      """
      """
-    self._OpenConfig()
-    self._ReleaseLock()
-    return self._config_data
+    return self._UnlockedGetNodeList()
+
+  @locking.ssynchronized(_config_lock, shared=1)
+  def GetAllNodesInfo(self):
+    """Get the configuration of all nodes.
+
+    @rtype: dict
+    @returns: dict of (node, node_info), where node_info is what
+              would GetNodeInfo return for the node
+
+    """
+    my_dict = dict([(node, self._UnlockedGetNodeInfo(node))
+                    for node in self._UnlockedGetNodeList()])
+    return my_dict
  
    def _BumpSerialNo(self):
      """Bump up the serial number of the config.
  
      """
  
    def _BumpSerialNo(self):
      """Bump up the serial number of the config.
  
      """
-    self._config_data.cluster.serial_no += 1
+    self._config_data.serial_no += 1
  
    def _OpenConfig(self):
      """Read the config data from disk.
  
    def _OpenConfig(self):
      """Read the config data from disk.
@@ -490,32 +836,28 @@ class ConfigWriter:
          self._config_inode == st.st_ino):
        # data is current, so skip loading of config file
        return
          self._config_inode == st.st_ino):
        # data is current, so skip loading of config file
        return
+
      f = open(self._cfg_file, 'r')
      try:
        try:
      f = open(self._cfg_file, 'r')
      try:
        try:
-        data = objects.ConfigObject.Load(f)
+        data = objects.ConfigData.FromDict(serializer.Load(f.read()))
        except Exception, err:
          raise errors.ConfigurationError(err)
      finally:
        f.close()
        except Exception, err:
          raise errors.ConfigurationError(err)
      finally:
        f.close()
+
+    # Make sure the configuration has the right version
+    _ValidateConfig(data)
+
      if (not hasattr(data, 'cluster') or
      if (not hasattr(data, 'cluster') or
-        not hasattr(data.cluster, 'config_version')):
+        not hasattr(data.cluster, 'rsahostkeypub')):
        raise errors.ConfigurationError("Incomplete configuration"
        raise errors.ConfigurationError("Incomplete configuration"
-                                      " (missing cluster.config_version)")
-    if data.cluster.config_version != constants.CONFIG_VERSION:
-      raise errors.ConfigurationError("Cluster configuration version"
-                                      " mismatch, got %s instead of %s" %
-                                      (data.cluster.config_version,
-                                       constants.CONFIG_VERSION))
+                                      " (missing cluster.rsahostkeypub)")
      self._config_data = data
      self._config_time = st.st_mtime
      self._config_size = st.st_size
      self._config_inode = st.st_ino
  
      self._config_data = data
      self._config_time = st.st_mtime
      self._config_size = st.st_size
      self._config_inode = st.st_ino
  
-  def _ReleaseLock(self):
-    """xxxx
-    """
-
    def _DistributeConfig(self):
      """Distribute the configuration to the other nodes.
  
    def _DistributeConfig(self):
      """Distribute the configuration to the other nodes.
  
@@ -526,21 +868,19 @@ class ConfigWriter:
      if self._offline:
        return True
      bad = False
      if self._offline:
        return True
      bad = False
-    nodelist = self.GetNodeList()
-    myhostname = socket.gethostname()
+    nodelist = self._UnlockedGetNodeList()
+    myhostname = self._my_hostname
  
  
-    tgt_list = []
-    for node in nodelist:
-      nodeinfo = self.GetNodeInfo(node)
-      if nodeinfo.name == myhostname:
-        continue
-      tgt_list.append(node)
+    try:
+      nodelist.remove(myhostname)
+    except ValueError:
+      pass
  
  
-    result = rpc.call_upload_file(tgt_list, self._cfg_file)
-    for node in tgt_list:
+    result = rpc.call_upload_file(nodelist, self._cfg_file)
+    for node in nodelist:
        if not result[node]:
        if not result[node]:
-        logger.Error("copy of file %s to node %s failed" %
-                     (self._cfg_file, node))
+        logging.error("copy of file %s to node %s failed",
+                      self._cfg_file, node)
          bad = True
      return not bad
  
          bad = True
      return not bad
  
@@ -551,16 +891,18 @@ class ConfigWriter:
      if destination is None:
        destination = self._cfg_file
      self._BumpSerialNo()
      if destination is None:
        destination = self._cfg_file
      self._BumpSerialNo()
+    txt = serializer.Dump(self._config_data.ToDict())
      dir_name, file_name = os.path.split(destination)
      fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
      f = os.fdopen(fd, 'w')
      try:
      dir_name, file_name = os.path.split(destination)
      fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
      f = os.fdopen(fd, 'w')
      try:
-      self._config_data.Dump(f)
+      f.write(txt)
        os.fsync(f.fileno())
      finally:
        f.close()
      # we don't need to do os.close(fd) as f.close() did it
      os.rename(name, destination)
        os.fsync(f.fileno())
      finally:
        f.close()
      # we don't need to do os.close(fd) as f.close() did it
      os.rename(name, destination)
+    self.write_count += 1
      # re-set our cache as not to re-read the config file
      try:
        st = os.stat(destination)
      # re-set our cache as not to re-read the config file
      try:
        st = os.stat(destination)
@@ -572,63 +914,67 @@ class ConfigWriter:
      # and redistribute the config file
      self._DistributeConfig()
  
      # and redistribute the config file
      self._DistributeConfig()
  
-  def InitConfig(self, node, primary_ip, secondary_ip,
-                 hostkeypub, mac_prefix, vg_name, def_bridge):
+  @locking.ssynchronized(_config_lock)
+  def InitConfig(self, version, cluster_config, master_node_config):
      """Create the initial cluster configuration.
  
      It will contain the current node, which will also be the master
      """Create the initial cluster configuration.
  
      It will contain the current node, which will also be the master
-    node, and no instances or operating systmes.
+    node, and no instances.
  
  
-    Args:
-      node: the nodename of the initial node
-      primary_ip: the IP address of the current host
-      secondary_ip: the secondary IP of the current host or None
-      hostkeypub: the public hostkey of this host
-
-    """
-    hu_port = constants.FIRST_DRBD_PORT - 1
-    globalconfig = objects.Cluster(config_version=constants.CONFIG_VERSION,
-                                   serial_no=1,
-                                   rsahostkeypub=hostkeypub,
-                                   highest_used_port=hu_port,
-                                   mac_prefix=mac_prefix,
-                                   volume_group_name=vg_name,
-                                   default_bridge=def_bridge,
-                                   tcpudp_port_pool=set())
-    if secondary_ip is None:
-      secondary_ip = primary_ip
-    nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
-                              secondary_ip=secondary_ip)
-
-    self._config_data = objects.ConfigData(nodes={node: nodeconfig},
+    @type version: int
+    @param version: Configuration version
+    @type cluster_config: objects.Cluster
+    @param cluster_config: Cluster configuration
+    @type master_node_config: objects.Node
+    @param master_node_config: Master node configuration
+
+    """
+    nodes = {
+      master_node_config.name: master_node_config,
+      }
+
+    self._config_data = objects.ConfigData(version=version,
+                                           cluster=cluster_config,
+                                           nodes=nodes,
                                             instances={},
                                             instances={},
-                                           cluster=globalconfig)
+                                           serial_no=1)
      self._WriteConfig()
  
      self._WriteConfig()
  
+  @locking.ssynchronized(_config_lock, shared=1)
    def GetVGName(self):
      """Return the volume group name.
  
      """
      self._OpenConfig()
    def GetVGName(self):
      """Return the volume group name.
  
      """
      self._OpenConfig()
-    self._ReleaseLock()
      return self._config_data.cluster.volume_group_name
  
      return self._config_data.cluster.volume_group_name
  
+  @locking.ssynchronized(_config_lock)
+  def SetVGName(self, vg_name):
+    """Set the volume group name.
+
+    """
+    self._OpenConfig()
+    self._config_data.cluster.volume_group_name = vg_name
+    self._config_data.cluster.serial_no += 1
+    self._WriteConfig()
+
+  @locking.ssynchronized(_config_lock, shared=1)
    def GetDefBridge(self):
      """Return the default bridge.
  
      """
      self._OpenConfig()
    def GetDefBridge(self):
      """Return the default bridge.
  
      """
      self._OpenConfig()
-    self._ReleaseLock()
      return self._config_data.cluster.default_bridge
  
      return self._config_data.cluster.default_bridge
  
+  @locking.ssynchronized(_config_lock, shared=1)
    def GetMACPrefix(self):
      """Return the mac prefix.
  
      """
      self._OpenConfig()
    def GetMACPrefix(self):
      """Return the mac prefix.
  
      """
      self._OpenConfig()
-    self._ReleaseLock()
      return self._config_data.cluster.mac_prefix
  
      return self._config_data.cluster.mac_prefix
  
+  @locking.ssynchronized(_config_lock, shared=1)
    def GetClusterInfo(self):
      """Returns informations about the cluster
  
    def GetClusterInfo(self):
      """Returns informations about the cluster
  
@@ -637,10 +983,10 @@ class ConfigWriter:
  
      """
      self._OpenConfig()
  
      """
      self._OpenConfig()
-    self._ReleaseLock()
  
      return self._config_data.cluster
  
  
      return self._config_data.cluster
  
+  @locking.ssynchronized(_config_lock)
    def Update(self, target):
      """Notify function to be called after updates.
  
    def Update(self, target):
      """Notify function to be called after updates.
  
@@ -666,4 +1012,6 @@ class ConfigWriter:
      if not test:
        raise errors.ConfigurationError("Configuration updated since object"
                                        " has been read or unknown object")
      if not test:
        raise errors.ConfigurationError("Configuration updated since object"
                                        " has been read or unknown object")
+    target.serial_no += 1
+
      self._WriteConfig()
      self._WriteConfig()