-#!/usr/bin/python
+#
#
# Copyright (C) 2006, 2007 Google Inc.
"""Configuration management for Ganeti
-This module provides the interface to the ganeti cluster configuration.
-
+This module provides the interface to the Ganeti cluster configuration.
-The configuration data is stored on every node but is updated on the
-master only. After each update, the master distributes the data to the
-other nodes.
+The configuration data is stored on every node but is updated on the master
+only. After each update, the master distributes the data to the other nodes.
-Currently the data storage format is pickle as yaml was initially not
-available, then we used it but it was a memory-eating slow beast, so
-we reverted to pickle using custom Unpicklers.
+Currently, the data storage format is JSON. YAML was slow and consuming too
+much memory.
"""
import os
-import socket
import tempfile
import random
from ganeti import constants
from ganeti import rpc
from ganeti import objects
-
-def _my_uuidgen():
- """Poor-man's uuidgen using the uuidgen binary.
-
- """
- result = utils.RunCmd(["uuidgen", "-r"])
- if result.failed:
- return None
- return result.stdout.rstrip('\n')
-
-
-try:
- import uuid
- _uuidgen = uuid.uuid4
-except ImportError:
- _uuidgen = _my_uuidgen
+from ganeti import serializer
class ConfigWriter:
"""
def __init__(self, cfg_file=None, offline=False):
+ self.write_count = 0
self._config_data = None
self._config_time = None
self._config_size = None
else:
self._cfg_file = cfg_file
self._temporary_ids = set()
+ # Note: in order to prevent errors when resolving our name in
+ # _DistributeConfig, we compute it here once and reuse it; it's
+ # better to raise an error before starting to modify the config
+ # file than after it was modified
+ self._my_hostname = utils.HostInfo().name
# this method needs to be static, so that we can call it on the class
@staticmethod
raise errors.ConfigurationError("Can't generate unique MAC")
return mac
+ def IsMacInUse(self, mac):
+ """Predicate: check if the specified MAC is in use in the Ganeti cluster.
+
+ This only checks instances managed by this cluster, it does not
+ check for potential collisions elsewhere.
+
+ """
+ self._OpenConfig()
+ self._ReleaseLock()
+ all_macs = self._AllMACs()
+ return mac in all_macs
+
def _ComputeAllLVs(self):
"""Compute the list of all LVs.
existing.update(exceptions)
retries = 64
while retries > 0:
- unique_id = _uuidgen()
+ unique_id = utils.NewUUID()
if unique_id not in existing and unique_id is not None:
break
else:
result = []
seen_macs = []
+ ports = {}
data = self._config_data
for instance_name in data.instances:
instance = data.instances[instance_name]
if instance.primary_node not in data.nodes:
- result.append("Instance '%s' has invalid primary node '%s'" %
+ result.append("instance '%s' has invalid primary node '%s'" %
(instance_name, instance.primary_node))
for snode in instance.secondary_nodes:
if snode not in data.nodes:
- result.append("Instance '%s' has invalid secondary node '%s'" %
+ result.append("instance '%s' has invalid secondary node '%s'" %
(instance_name, snode))
for idx, nic in enumerate(instance.nics):
if nic.mac in seen_macs:
- result.append("Instance '%s' has NIC %d mac %s duplicate" %
+ result.append("instance '%s' has NIC %d mac %s duplicate" %
(instance_name, idx, nic.mac))
else:
seen_macs.append(nic.mac)
+
+ # gather the drbd ports for duplicate checks
+ for dsk in instance.disks:
+ if dsk.dev_type in constants.LDS_DRBD:
+ tcp_port = dsk.logical_id[2]
+ if tcp_port not in ports:
+ ports[tcp_port] = []
+ ports[tcp_port].append((instance.name, "drbd disk %s" % dsk.iv_name))
+ # gather network port reservation
+ net_port = getattr(instance, "network_port", None)
+ if net_port is not None:
+ if net_port not in ports:
+ ports[net_port] = []
+ ports[net_port].append((instance.name, "network port"))
+
+ # cluster-wide pool of free ports
+ for free_port in self._config_data.cluster.tcpudp_port_pool:
+ if free_port not in ports:
+ ports[free_port] = []
+ ports[free_port].append(("cluster", "port marked as free"))
+
+ # compute tcp/udp duplicate ports
+ keys = ports.keys()
+ keys.sort()
+ for pnum in keys:
+ pdata = ports[pnum]
+ if len(pdata) > 1:
+ txt = ", ".join(["%s/%s" % val for val in pdata])
+ result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
+
+ # highest used tcp port check
+ if keys:
+ if keys[-1] > self._config_data.cluster.highest_used_port:
+ result.append("Highest used port mismatch, saved %s, computed %s" %
+ (self._config_data.cluster.highest_used_port,
+ keys[-1]))
+
return result
def SetDiskID(self, disk, node_name):
if disk.logical_id is None and disk.physical_id is not None:
return
- if disk.dev_type == "drbd":
+ if disk.dev_type in constants.LDS_DRBD:
pnode, snode, port = disk.logical_id
if node_name not in (pnode, snode):
raise errors.ConfigurationError("DRBD device not knowing node %s" %
self._config_data.instances[instance.name] = instance
self._WriteConfig()
- def MarkInstanceUp(self, instance_name):
- """Mark the instance status to up in the config.
+ def _SetInstanceStatus(self, instance_name, status):
+ """Set the instance's status to a given value.
"""
+ if status not in ("up", "down"):
+ raise errors.ProgrammerError("Invalid status '%s' passed to"
+ " ConfigWriter._SetInstanceStatus()" %
+ status)
self._OpenConfig()
if instance_name not in self._config_data.instances:
raise errors.ConfigurationError("Unknown instance '%s'" %
instance_name)
instance = self._config_data.instances[instance_name]
- instance.status = "up"
- self._WriteConfig()
+ if instance.status != status:
+ instance.status = status
+ self._WriteConfig()
+
+ def MarkInstanceUp(self, instance_name):
+ """Mark the instance status to up in the config.
+
+ """
+ self._SetInstanceStatus(instance_name, "up")
def RemoveInstance(self, instance_name):
"""Remove the instance from the configuration.
del self._config_data.instances[instance_name]
self._WriteConfig()
- def MarkInstanceDown(self, instance_name):
- """Mark the status of an instance to down in the configuration.
+ def RenameInstance(self, old_name, new_name):
+ """Rename an instance.
+
+ This needs to be done in ConfigWriter and not by RemoveInstance
+ combined with AddInstance as only we can guarantee an atomic
+ rename.
"""
self._OpenConfig()
-
- if instance_name not in self._config_data.instances:
- raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
- instance = self._config_data.instances[instance_name]
- instance.status = "down"
+ if old_name not in self._config_data.instances:
+ raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
+ inst = self._config_data.instances[old_name]
+ del self._config_data.instances[old_name]
+ inst.name = new_name
+ self._config_data.instances[inst.name] = inst
self._WriteConfig()
+ def MarkInstanceDown(self, instance_name):
+ """Mark the status of an instance to down in the configuration.
+
+ """
+ self._SetInstanceStatus(instance_name, "down")
+
def GetInstanceList(self):
"""Get the list of instances.
f = open(self._cfg_file, 'r')
try:
try:
- data = objects.ConfigObject.Load(f)
+ data = objects.ConfigData.FromDict(serializer.Load(f.read()))
except Exception, err:
raise errors.ConfigurationError(err)
finally:
return True
bad = False
nodelist = self.GetNodeList()
- myhostname = socket.gethostname()
+ myhostname = self._my_hostname
- tgt_list = []
- for node in nodelist:
- nodeinfo = self.GetNodeInfo(node)
- if nodeinfo.name == myhostname:
- continue
- tgt_list.append(node)
+ try:
+ nodelist.remove(myhostname)
+ except ValueError:
+ pass
- result = rpc.call_upload_file(tgt_list, self._cfg_file)
- for node in tgt_list:
+ result = rpc.call_upload_file(nodelist, self._cfg_file)
+ for node in nodelist:
if not result[node]:
logger.Error("copy of file %s to node %s failed" %
(self._cfg_file, node))
if destination is None:
destination = self._cfg_file
self._BumpSerialNo()
+ txt = serializer.Dump(self._config_data.ToDict())
dir_name, file_name = os.path.split(destination)
fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
f = os.fdopen(fd, 'w')
try:
- self._config_data.Dump(f)
+ f.write(txt)
os.fsync(f.fileno())
finally:
f.close()
# we don't need to do os.close(fd) as f.close() did it
os.rename(name, destination)
+ self.write_count += 1
# re-set our cache as not to re-read the config file
try:
st = os.stat(destination)