#
#
-# Copyright (C) 2006, 2007 Google Inc.
+# Copyright (C) 2006, 2007, 2008, 2010 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
"""
+import sys
+import re
import os
-import tempfile
import errno
-import socket
from ganeti import errors
from ganeti import constants
+from ganeti import utils
+from ganeti import serializer
+from ganeti import objects
+from ganeti import netutils
-class SimpleStore:
+SSCONF_LOCK_TIMEOUT = 10
+
+RE_VALID_SSCONF_NAME = re.compile(r"^[-_a-z0-9]+$")
+
+
+class SimpleConfigReader(object):
+ """Simple class to read configuration file.
+
+ """
+ def __init__(self, file_name=constants.CLUSTER_CONF_FILE):
+ """Initializes this class.
+
+ @type file_name: string
+ @param file_name: Configuration file path
+
+ """
+ self._file_name = file_name
+ self._last_inode = None
+ self._last_mtime = None
+ self._last_size = None
+
+ self._config_data = None
+ self._inst_ips_by_link = None
+ self._ip_to_inst_by_link = None
+ self._instances_ips = None
+ self._mc_primary_ips = None
+ self._nodes_primary_ips = None
+
+ # we need a forced reload at class init time, to initialize _last_*
+ self._Load(force=True)
+
+ def _Load(self, force=False):
+ """Loads (or reloads) the config file.
+
+ @type force: boolean
+ @param force: whether to force the reload without checking the mtime
+ @rtype: boolean
+ @return: boolean value that says whether we reloaded the configuration or
+ not (because we decided it was already up-to-date)
+
+ """
+ try:
+ cfg_stat = os.stat(self._file_name)
+ except EnvironmentError, err:
+ raise errors.ConfigurationError("Cannot stat config file %s: %s" %
+ (self._file_name, err))
+ inode = cfg_stat.st_ino
+ mtime = cfg_stat.st_mtime
+ size = cfg_stat.st_size
+
+ if (force or inode != self._last_inode or
+ mtime > self._last_mtime or
+ size != self._last_size):
+ self._last_inode = inode
+ self._last_mtime = mtime
+ self._last_size = size
+ else:
+ # Don't reload
+ return False
+
+ try:
+ self._config_data = serializer.Load(utils.ReadFile(self._file_name))
+ except EnvironmentError, err:
+ raise errors.ConfigurationError("Cannot read config file %s: %s" %
+ (self._file_name, err))
+ except ValueError, err:
+ raise errors.ConfigurationError("Cannot load config file %s: %s" %
+ (self._file_name, err))
+
+ self._ip_to_inst_by_link = {}
+ self._instances_ips = []
+ self._inst_ips_by_link = {}
+ c_nparams = self._config_data["cluster"]["nicparams"][constants.PP_DEFAULT]
+ for iname in self._config_data["instances"]:
+ instance = self._config_data["instances"][iname]
+ for nic in instance["nics"]:
+ if "ip" in nic and nic["ip"]:
+ params = objects.FillDict(c_nparams, nic["nicparams"])
+ if not params["link"] in self._inst_ips_by_link:
+ self._inst_ips_by_link[params["link"]] = []
+ self._ip_to_inst_by_link[params["link"]] = {}
+ self._ip_to_inst_by_link[params["link"]][nic["ip"]] = iname
+ self._inst_ips_by_link[params["link"]].append(nic["ip"])
+
+ self._nodes_primary_ips = []
+ self._mc_primary_ips = []
+ for node_name in self._config_data["nodes"]:
+ node = self._config_data["nodes"][node_name]
+ self._nodes_primary_ips.append(node["primary_ip"])
+ if node["master_candidate"]:
+ self._mc_primary_ips.append(node["primary_ip"])
+
+ return True
+
+ # Clients can request a reload of the config file, so we export our internal
+ # _Load function as Reload.
+ Reload = _Load
+
+ def GetClusterName(self):
+ return self._config_data["cluster"]["cluster_name"]
+
+ def GetHostKey(self):
+ return self._config_data["cluster"]["rsahostkeypub"]
+
+ def GetMasterNode(self):
+ return self._config_data["cluster"]["master_node"]
+
+ def GetMasterIP(self):
+ return self._config_data["cluster"]["master_ip"]
+
+ def GetMasterNetdev(self):
+ return self._config_data["cluster"]["master_netdev"]
+
+ def GetMasterNetmask(self):
+ return self._config_data["cluster"]["master_netmask"]
+
+ def GetFileStorageDir(self):
+ return self._config_data["cluster"]["file_storage_dir"]
+
+ def GetSharedFileStorageDir(self):
+ return self._config_data["cluster"]["shared_file_storage_dir"]
+
+ def GetNodeList(self):
+ return self._config_data["nodes"].keys()
+
+ def GetConfigSerialNo(self):
+ return self._config_data["serial_no"]
+
+ def GetClusterSerialNo(self):
+ return self._config_data["cluster"]["serial_no"]
+
+ def GetDefaultNicParams(self):
+ return self._config_data["cluster"]["nicparams"][constants.PP_DEFAULT]
+
+ def GetDefaultNicLink(self):
+ return self.GetDefaultNicParams()[constants.NIC_LINK]
+
+ def GetNodeStatusFlags(self, node):
+ """Get a node's status flags
+
+ @type node: string
+ @param node: node name
+ @rtype: (bool, bool, bool)
+ @return: (master_candidate, drained, offline) (or None if no such node)
+
+ """
+ if node not in self._config_data["nodes"]:
+ return None
+
+ master_candidate = self._config_data["nodes"][node]["master_candidate"]
+ drained = self._config_data["nodes"][node]["drained"]
+ offline = self._config_data["nodes"][node]["offline"]
+ return master_candidate, drained, offline
+
+ def GetInstanceByLinkIp(self, ip, link):
+ """Get instance name from its link and ip address.
+
+ @type ip: string
+ @param ip: ip address
+ @type link: string
+ @param link: nic link
+ @rtype: string
+ @return: instance name
+
+ """
+ if not link:
+ link = self.GetDefaultNicLink()
+ if not link in self._ip_to_inst_by_link:
+ return None
+ if not ip in self._ip_to_inst_by_link[link]:
+ return None
+ return self._ip_to_inst_by_link[link][ip]
+
+ def GetNodePrimaryIp(self, node):
+ """Get a node's primary ip
+
+ @type node: string
+ @param node: node name
+ @rtype: string, or None
+ @return: node's primary ip, or None if no such node
+
+ """
+ if node not in self._config_data["nodes"]:
+ return None
+ return self._config_data["nodes"][node]["primary_ip"]
+
+ def GetInstancePrimaryNode(self, instance):
+ """Get an instance's primary node
+
+ @type instance: string
+ @param instance: instance name
+ @rtype: string, or None
+ @return: primary node, or None if no such instance
+
+ """
+ if instance not in self._config_data["instances"]:
+ return None
+ return self._config_data["instances"][instance]["primary_node"]
+
+ def GetNodesPrimaryIps(self):
+ return self._nodes_primary_ips
+
+ def GetMasterCandidatesPrimaryIps(self):
+ return self._mc_primary_ips
+
+ def GetInstancesIps(self, link):
+ """Get list of nic ips connected to a certain link.
+
+ @type link: string
+ @param link: nic link
+ @rtype: list
+ @return: list of ips connected to that link
+
+ """
+ if not link:
+ link = self.GetDefaultNicLink()
+
+ if link in self._inst_ips_by_link:
+ return self._inst_ips_by_link[link]
+ else:
+ return []
+
+
+class SimpleStore(object):
"""Interface to static cluster data.
- This is different that the config.ConfigWriter class in that it
- holds data that is (mostly) constant after the cluster
- initialization. Its purpose is to allow limited customization of
- things which would otherwise normally live in constants.py. Note
- that this data cannot live in ConfigWriter as that is available only
- on the master node, and our data must be readable by both the master
- and the nodes.
+ This is different that the config.ConfigWriter and
+ SimpleConfigReader classes in that it holds data that will always be
+ present, even on nodes which don't have all the cluster data.
Other particularities of the datastore:
- keys are restricted to predefined values
- - values are small (<4k)
- - since the data is practically static, read keys are cached in memory
- - some keys are handled specially (read from the system, so
- we can't update them)
"""
_SS_FILEPREFIX = "ssconf_"
- SS_HYPERVISOR = "hypervisor"
- SS_NODED_PASS = "node_pass"
- SS_MASTER_NODE = "master_node"
- SS_MASTER_IP = "master_ip"
- SS_MASTER_NETDEV = "master_netdev"
- SS_CLUSTER_NAME = "cluster_name"
- _VALID_KEYS = (SS_HYPERVISOR, SS_NODED_PASS, SS_MASTER_NODE, SS_MASTER_IP,
- SS_MASTER_NETDEV, SS_CLUSTER_NAME)
- _MAX_SIZE = 4096
+ _VALID_KEYS = (
+ constants.SS_CLUSTER_NAME,
+ constants.SS_CLUSTER_TAGS,
+ constants.SS_FILE_STORAGE_DIR,
+ constants.SS_SHARED_FILE_STORAGE_DIR,
+ constants.SS_MASTER_CANDIDATES,
+ constants.SS_MASTER_CANDIDATES_IPS,
+ constants.SS_MASTER_IP,
+ constants.SS_MASTER_NETDEV,
+ constants.SS_MASTER_NETMASK,
+ constants.SS_MASTER_NODE,
+ constants.SS_NODE_LIST,
+ constants.SS_NODE_PRIMARY_IPS,
+ constants.SS_NODE_SECONDARY_IPS,
+ constants.SS_OFFLINE_NODES,
+ constants.SS_ONLINE_NODES,
+ constants.SS_PRIMARY_IP_FAMILY,
+ constants.SS_INSTANCE_LIST,
+ constants.SS_RELEASE_VERSION,
+ constants.SS_HYPERVISOR_LIST,
+ constants.SS_MAINTAIN_NODE_HEALTH,
+ constants.SS_UID_POOL,
+ constants.SS_NODEGROUPS,
+ )
+ _MAX_SIZE = 131072
def __init__(self, cfg_location=None):
if cfg_location is None:
self._cfg_dir = constants.DATA_DIR
else:
self._cfg_dir = cfg_location
- self._cache = {}
def KeyToFilename(self, key):
"""Convert a given key into filename.
raise errors.ProgrammerError("Invalid key requested from SSConf: '%s'"
% str(key))
- filename = self._cfg_dir + '/' + self._SS_FILEPREFIX + key
+ filename = self._cfg_dir + "/" + self._SS_FILEPREFIX + key
return filename
- def _ReadFile(self, key):
+ def _ReadFile(self, key, default=None):
"""Generic routine to read keys.
This will read the file which holds the value requested. Errors
will be changed into ConfigurationErrors.
"""
- if key in self._cache:
- return self._cache[key]
filename = self.KeyToFilename(key)
try:
- fh = file(filename, 'r')
- try:
- data = fh.readline(self._MAX_SIZE)
- data = data.rstrip('\n')
- finally:
- fh.close()
+ data = utils.ReadFile(filename, size=self._MAX_SIZE)
except EnvironmentError, err:
+ if err.errno == errno.ENOENT and default is not None:
+ return default
raise errors.ConfigurationError("Can't read from the ssconf file:"
" '%s'" % str(err))
- self._cache[key] = data
+ data = data.rstrip("\n")
return data
- def GetNodeDaemonPort(self):
- """Get the node daemon port for this cluster.
+ def WriteFiles(self, values):
+ """Writes ssconf files used by external scripts.
- Note that this routine does not read a ganeti-specific file, but
- instead uses socket.getservbyname to allow pre-customization of
- this parameter outside of ganeti.
+ @type values: dict
+ @param values: Dictionary of (name, value)
"""
+ ssconf_lock = utils.FileLock.Open(constants.SSCONF_LOCK_FILE)
+
+ # Get lock while writing files
+ ssconf_lock.Exclusive(blocking=True, timeout=SSCONF_LOCK_TIMEOUT)
try:
- port = socket.getservbyname("ganeti-noded", "tcp")
- except socket.error:
- port = constants.DEFAULT_NODED_PORT
+ for name, value in values.iteritems():
+ if value and not value.endswith("\n"):
+ value += "\n"
+ if len(value) > self._MAX_SIZE:
+ raise errors.ConfigurationError("ssconf file %s above maximum size" %
+ name)
+ utils.WriteFile(self.KeyToFilename(name), data=value,
+ mode=constants.SS_FILE_PERMS)
+ finally:
+ ssconf_lock.Unlock()
- return port
+ def GetFileList(self):
+ """Return the list of all config files.
- def GetHypervisorType(self):
- """Get the hypervisor type for this cluster.
+ This is used for computing node replication data.
"""
- return self._ReadFile(self.SS_HYPERVISOR)
+ return [self.KeyToFilename(key) for key in self._VALID_KEYS]
- def GetNodeDaemonPassword(self):
- """Get the node password for this cluster.
+ def GetClusterName(self):
+ """Get the cluster name.
"""
- return self._ReadFile(self.SS_NODED_PASS)
+ return self._ReadFile(constants.SS_CLUSTER_NAME)
- def GetMasterNode(self):
- """Get the hostname of the master node for this cluster.
+ def GetFileStorageDir(self):
+ """Get the file storage dir.
+
+ """
+ return self._ReadFile(constants.SS_FILE_STORAGE_DIR)
+
+ def GetSharedFileStorageDir(self):
+ """Get the shared file storage dir.
+
+ """
+ return self._ReadFile(constants.SS_SHARED_FILE_STORAGE_DIR)
+
+ def GetMasterCandidates(self):
+ """Return the list of master candidates.
+
+ """
+ data = self._ReadFile(constants.SS_MASTER_CANDIDATES)
+ nl = data.splitlines(False)
+ return nl
+
+ def GetMasterCandidatesIPList(self):
+ """Return the list of master candidates' primary IP.
"""
- return self._ReadFile(self.SS_MASTER_NODE)
+ data = self._ReadFile(constants.SS_MASTER_CANDIDATES_IPS)
+ nl = data.splitlines(False)
+ return nl
def GetMasterIP(self):
"""Get the IP of the master node for this cluster.
"""
- return self._ReadFile(self.SS_MASTER_IP)
+ return self._ReadFile(constants.SS_MASTER_IP)
def GetMasterNetdev(self):
"""Get the netdev to which we'll add the master ip.
"""
- return self._ReadFile(self.SS_MASTER_NETDEV)
+ return self._ReadFile(constants.SS_MASTER_NETDEV)
- def GetClusterName(self):
- """Get the cluster name.
+ def GetMasterNetmask(self):
+ """Get the master netmask.
"""
- return self._ReadFile(self.SS_CLUSTER_NAME)
+ try:
+ return self._ReadFile(constants.SS_MASTER_NETMASK)
+ except errors.ConfigurationError:
+ family = self.GetPrimaryIPFamily()
+ ipcls = netutils.IPAddress.GetClassFromIpFamily(family)
+ return ipcls.iplen
- def SetKey(self, key, value):
- """Set the value of a key.
+ def GetMasterNode(self):
+ """Get the hostname of the master node for this cluster.
+
+ """
+ return self._ReadFile(constants.SS_MASTER_NODE)
- This should be used only when adding a node to a cluster.
+ def GetNodeList(self):
+ """Return the list of cluster nodes.
"""
- file_name = self.KeyToFilename(key)
- dir_name, small_name = os.path.split(file_name)
- fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
- # here we need to make sure we remove the temp file, if any error
- # leaves it in place
- try:
- os.chown(new_name, 0, 0)
- os.chmod(new_name, 0400)
- os.write(fd, "%s\n" % str(value))
- os.fsync(fd)
- os.rename(new_name, file_name)
- self._cache[key] = value
- finally:
- os.close(fd)
- try:
- os.unlink(new_name)
- except OSError, err:
- if err.errno != errno.ENOENT:
- raise
+ data = self._ReadFile(constants.SS_NODE_LIST)
+ nl = data.splitlines(False)
+ return nl
- def GetFileList(self):
- """Return the lis of all config files.
+ def GetNodePrimaryIPList(self):
+ """Return the list of cluster nodes' primary IP.
- This is used for computing node replication data.
+ """
+ data = self._ReadFile(constants.SS_NODE_PRIMARY_IPS)
+ nl = data.splitlines(False)
+ return nl
+
+ def GetNodeSecondaryIPList(self):
+ """Return the list of cluster nodes' secondary IP.
"""
- return [self.KeyToFilename(key) for key in self._VALID_KEYS]
+ data = self._ReadFile(constants.SS_NODE_SECONDARY_IPS)
+ nl = data.splitlines(False)
+ return nl
+
+ def GetNodegroupList(self):
+ """Return the list of nodegroups.
+
+ """
+ data = self._ReadFile(constants.SS_NODEGROUPS)
+ nl = data.splitlines(False)
+ return nl
+
+ def GetClusterTags(self):
+ """Return the cluster tags.
+
+ """
+ data = self._ReadFile(constants.SS_CLUSTER_TAGS)
+ nl = data.splitlines(False)
+ return nl
+
+ def GetHypervisorList(self):
+ """Return the list of enabled hypervisors.
+
+ """
+ data = self._ReadFile(constants.SS_HYPERVISOR_LIST)
+ nl = data.splitlines(False)
+ return nl
+
+ def GetMaintainNodeHealth(self):
+ """Return the value of the maintain_node_health option.
+
+ """
+ data = self._ReadFile(constants.SS_MAINTAIN_NODE_HEALTH)
+ # we rely on the bool serialization here
+ return data == "True"
+
+ def GetUidPool(self):
+ """Return the user-id pool definition string.
+
+ The separator character is a newline.
+
+ The return value can be parsed using uidpool.ParseUidPool()::
+
+ ss = ssconf.SimpleStore()
+ uid_pool = uidpool.ParseUidPool(ss.GetUidPool(), separator="\\n")
+
+ """
+ data = self._ReadFile(constants.SS_UID_POOL)
+ return data
+
+ def GetPrimaryIPFamily(self):
+ """Return the cluster-wide primary address family.
+
+ """
+ try:
+ return int(self._ReadFile(constants.SS_PRIMARY_IP_FAMILY,
+ default=netutils.IP4Address.family))
+ except (ValueError, TypeError), err:
+ raise errors.ConfigurationError("Error while trying to parse primary ip"
+ " family: %s" % err)
+
+
+def GetMasterAndMyself(ss=None):
+ """Get the master node and my own hostname.
+
+ This can be either used for a 'soft' check (compared to CheckMaster,
+ which exits) or just for computing both at the same time.
+
+ The function does not handle any errors, these should be handled in
+ the caller (errors.ConfigurationError, errors.ResolverError).
+
+ @param ss: either a sstore.SimpleConfigReader or a
+ sstore.SimpleStore instance
+ @rtype: tuple
+ @return: a tuple (master node name, my own name)
+
+ """
+ if ss is None:
+ ss = SimpleStore()
+ return ss.GetMasterNode(), netutils.Hostname.GetSysName()
+
+
+def CheckMaster(debug, ss=None):
+ """Checks the node setup.
+
+ If this is the master, the function will return. Otherwise it will
+ exit with an exit code based on the node status.
+
+ """
+ try:
+ master_name, myself = GetMasterAndMyself(ss)
+ except errors.ConfigurationError, err:
+ print "Cluster configuration incomplete: '%s'" % str(err)
+ sys.exit(constants.EXIT_NODESETUP_ERROR)
+ except errors.ResolverError, err:
+ sys.stderr.write("Cannot resolve my own name (%s)\n" % err.args[0])
+ sys.exit(constants.EXIT_NODESETUP_ERROR)
+
+ if myself != master_name:
+ if debug:
+ sys.stderr.write("Not master, exiting.\n")
+ sys.exit(constants.EXIT_NOTMASTER)