import tempfile
import random
import logging
+import time
from ganeti import errors
from ganeti import locking
"""
result = []
- if disk.logical_id in l_ids:
- result.append("duplicate logical id %s" % str(disk.logical_id))
- else:
- l_ids.append(disk.logical_id)
- if disk.physical_id in p_ids:
- result.append("duplicate physical id %s" % str(disk.physical_id))
- else:
- p_ids.append(disk.physical_id)
+ if disk.logical_id is not None:
+ if disk.logical_id in l_ids:
+ result.append("duplicate logical id %s" % str(disk.logical_id))
+ else:
+ l_ids.append(disk.logical_id)
+ if disk.physical_id is not None:
+ if disk.physical_id in p_ids:
+ result.append("duplicate physical id %s" % str(disk.physical_id))
+ else:
+ p_ids.append(disk.physical_id)
if disk.children:
for child in disk.children:
data = self._config_data
seen_lids = []
seen_pids = []
+
+ # global cluster checks
+ if not data.cluster.enabled_hypervisors:
+ result.append("enabled hypervisors list doesn't have any entries")
+ invalid_hvs = set(data.cluster.enabled_hypervisors) - constants.HYPER_TYPES
+ if invalid_hvs:
+ result.append("enabled hypervisors contains invalid entries: %s" %
+ invalid_hvs)
+
+ if data.cluster.master_node not in data.nodes:
+ result.append("cluster has invalid primary node '%s'" %
+ data.cluster.master_node)
+
+ # per-instance checks
for instance_name in data.instances:
instance = data.instances[instance_name]
if instance.primary_node not in data.nodes:
def _AppendUsedPorts(instance_name, disk, used):
duplicates = []
if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
- nodeA, nodeB, dummy, minorA, minorB = disk.logical_id[:5]
- for node, port in ((nodeA, minorA), (nodeB, minorB)):
+ node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
+ for node, port in ((node_a, minor_a), (node_b, minor_b)):
assert node in used, ("Node '%s' of instance '%s' not found"
" in node list" % (node, instance_name))
if port in used[node]:
"""Get the hypervisor type for this cluster.
"""
- return self._config_data.cluster.default_hypervisor
+ return self._config_data.cluster.enabled_hypervisors[0]
@locking.ssynchronized(_config_lock, shared=1)
def GetHostKey(self):
all_lvs = instance.MapLVsByNode()
logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
+ all_macs = self._AllMACs()
+ for nic in instance.nics:
+ if nic.mac in all_macs:
+ raise errors.ConfigurationError("Cannot add instance %s:"
+ " MAC address '%s' already in use." % (instance.name, nic.mac))
+
instance.serial_no = 1
+ instance.ctime = instance.mtime = time.time()
self._config_data.instances[instance.name] = instance
self._config_data.cluster.serial_no += 1
self._UnlockedReleaseDRBDMinors(instance.name)
if instance.admin_up != status:
instance.admin_up = status
instance.serial_no += 1
+ instance.mtime = time.time()
self._WriteConfig()
@locking.ssynchronized(_config_lock)
self._config_data.instances.keys())
def _UnlockedGetInstanceInfo(self, instance_name):
- """Returns informations about an instance.
+ """Returns information about an instance.
This function is for internal use, when the config lock is already held.
@locking.ssynchronized(_config_lock, shared=1)
def GetInstanceInfo(self, instance_name):
- """Returns informations about an instance.
+ """Returns information about an instance.
- It takes the information from the configuration file. Other informations of
+ It takes the information from the configuration file. Other information of
an instance are taken from the live systems.
@param instance_name: name of the instance, e.g.
"""Get the configuration of all instances.
@rtype: dict
- @returns: dict of (instance, instance_info), where instance_info is what
+ @return: dict of (instance, instance_info), where instance_info is what
would GetInstanceInfo return for the node
"""
logging.info("Adding node %s to configuration" % node.name)
node.serial_no = 1
+ node.ctime = node.mtime = time.time()
self._config_data.nodes[node.name] = node
self._config_data.cluster.serial_no += 1
self._WriteConfig()
for node in self._UnlockedGetNodeList()])
return my_dict
- def _UnlockedGetMasterCandidateStats(self):
+ def _UnlockedGetMasterCandidateStats(self, exceptions=None):
"""Get the number of current and maximum desired and possible candidates.
+ @type exceptions: list
+ @param exceptions: if passed, list of nodes that should be ignored
@rtype: tuple
@return: tuple of (current, desired and possible)
"""
mc_now = mc_max = 0
- for node in self._config_data.nodes.itervalues():
+ for node in self._config_data.nodes.values():
+ if exceptions and node.name in exceptions:
+ continue
if not (node.offline or node.drained):
mc_max += 1
if node.master_candidate:
return (mc_now, mc_max)
@locking.ssynchronized(_config_lock, shared=1)
- def GetMasterCandidateStats(self):
+ def GetMasterCandidateStats(self, exceptions=None):
"""Get the number of current and maximum possible candidates.
This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
+ @type exceptions: list
+ @param exceptions: if passed, list of nodes that should be ignored
@rtype: tuple
@return: tuple of (current, max)
"""
- return self._UnlockedGetMasterCandidateStats()
+ return self._UnlockedGetMasterCandidateStats(exceptions)
@locking.ssynchronized(_config_lock)
def MaintainCandidatePool(self):
"""
self._config_data.serial_no += 1
+ self._config_data.mtime = time.time()
def _OpenConfig(self):
"""Read the config data from disk.
"""
- f = open(self._cfg_file, 'r')
+ raw_data = utils.ReadFile(self._cfg_file)
+
try:
- try:
- data = objects.ConfigData.FromDict(serializer.Load(f.read()))
- except Exception, err:
- raise errors.ConfigurationError(err)
- finally:
- f.close()
+ data = objects.ConfigData.FromDict(serializer.Load(raw_data))
+ except Exception, err:
+ raise errors.ConfigurationError(err)
# Make sure the configuration has the right version
_ValidateConfig(data)
result = rpc.RpcRunner.call_upload_file(node_list, self._cfg_file,
address_list=addr_list)
- for node in node_list:
- if not result[node]:
- logging.error("copy of file %s to node %s failed",
- self._cfg_file, node)
+ for to_node, to_result in result.items():
+ msg = to_result.fail_msg
+ if msg:
+ msg = ("Copy of file %s to node %s failed: %s" %
+ (self._cfg_file, to_node, msg))
+ logging.error(msg)
bad = True
return not bad
destination = self._cfg_file
self._BumpSerialNo()
txt = serializer.Dump(self._config_data.ToDict())
- dir_name, file_name = os.path.split(destination)
- fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
- f = os.fdopen(fd, 'w')
- try:
- f.write(txt)
- os.fsync(f.fileno())
- finally:
- f.close()
- # we don't need to do os.close(fd) as f.close() did it
- os.rename(name, destination)
+
+ utils.WriteFile(destination, data=txt)
+
self.write_count += 1
# and redistribute the config file to master candidates
# Write ssconf files on all nodes (including locally)
if self._last_cluster_serial < self._config_data.cluster.serial_no:
if not self._offline:
- rpc.RpcRunner.call_write_ssconf_files(self._UnlockedGetNodeList(),
- self._UnlockedGetSsconfValues())
+ result = rpc.RpcRunner.call_write_ssconf_files(\
+ self._UnlockedGetNodeList(),
+ self._UnlockedGetSsconfValues())
+ for nname, nresu in result.items():
+ msg = nresu.fail_msg
+ if msg:
+ logging.warning("Error while uploading ssconf files to"
+ " node %s: %s", nname, msg)
self._last_cluster_serial = self._config_data.cluster.serial_no
def _UnlockedGetSsconfValues(self):
instance_names = utils.NiceSort(self._UnlockedGetInstanceList())
node_names = utils.NiceSort(self._UnlockedGetNodeList())
node_info = [self._UnlockedGetNodeInfo(name) for name in node_names]
+ node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip)
+ for ninfo in node_info]
+ node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip)
+ for ninfo in node_info]
instance_data = fn(instance_names)
off_data = fn(node.name for node in node_info if node.offline)
on_data = fn(node.name for node in node_info if not node.offline)
mc_data = fn(node.name for node in node_info if node.master_candidate)
+ mc_ips_data = fn(node.primary_ip for node in node_info
+ if node.master_candidate)
node_data = fn(node_names)
+ node_pri_ips_data = fn(node_pri_ips)
+ node_snd_ips_data = fn(node_snd_ips)
cluster = self._config_data.cluster
+ cluster_tags = fn(cluster.GetTags())
return {
constants.SS_CLUSTER_NAME: cluster.cluster_name,
+ constants.SS_CLUSTER_TAGS: cluster_tags,
constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir,
constants.SS_MASTER_CANDIDATES: mc_data,
+ constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
constants.SS_MASTER_IP: cluster.master_ip,
constants.SS_MASTER_NETDEV: cluster.master_netdev,
constants.SS_MASTER_NODE: cluster.master_node,
constants.SS_NODE_LIST: node_data,
+ constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,
+ constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
constants.SS_OFFLINE_NODES: off_data,
constants.SS_ONLINE_NODES: on_data,
constants.SS_INSTANCE_LIST: instance_data,
constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
}
- @locking.ssynchronized(_config_lock)
- def InitConfig(self, version, cluster_config, master_node_config):
- """Create the initial cluster configuration.
-
- It will contain the current node, which will also be the master
- node, and no instances.
-
- @type version: int
- @param version: Configuration version
- @type cluster_config: objects.Cluster
- @param cluster_config: Cluster configuration
- @type master_node_config: objects.Node
- @param master_node_config: Master node configuration
-
- """
- nodes = {
- master_node_config.name: master_node_config,
- }
-
- self._config_data = objects.ConfigData(version=version,
- cluster=cluster_config,
- nodes=nodes,
- instances={},
- serial_no=1)
- self._WriteConfig()
-
@locking.ssynchronized(_config_lock, shared=1)
def GetVGName(self):
"""Return the volume group name.
self._WriteConfig()
@locking.ssynchronized(_config_lock, shared=1)
- def GetDefBridge(self):
- """Return the default bridge.
-
- """
- return self._config_data.cluster.default_bridge
-
- @locking.ssynchronized(_config_lock, shared=1)
def GetMACPrefix(self):
"""Return the mac prefix.
@locking.ssynchronized(_config_lock, shared=1)
def GetClusterInfo(self):
- """Returns informations about the cluster
+ """Returns information about the cluster
@rtype: L{objects.Cluster}
@return: the cluster object
raise errors.ConfigurationError("Configuration updated since object"
" has been read or unknown object")
target.serial_no += 1
+ target.mtime = now = time.time()
if update_serial:
# for node updates, we need to increase the cluster serial too
self._config_data.cluster.serial_no += 1
+ self._config_data.cluster.mtime = now
if isinstance(target, objects.Instance):
self._UnlockedReleaseDRBDMinors(target.name)