X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/cff4c03758236ac90233fe0937e91d2acde145ad..94a02bb5541fc7378da1f382665847e1f6cff684:/lib/config.py diff --git a/lib/config.py b/lib/config.py index 739f761..986b03a 100644 --- a/lib/config.py +++ b/lib/config.py @@ -49,6 +49,14 @@ _config_lock = locking.SharedLock() def _ValidateConfig(data): + """Verifies that a configuration objects looks valid. + + This only verifies the version of the configuration. + + @raise errors.ConfigurationError: if the version differs from what + we expect + + """ if data.version != constants.CONFIG_VERSION: raise errors.ConfigurationError("Cluster configuration version" " mismatch, got %s instead of %s" % @@ -76,6 +84,7 @@ class ConfigWriter: # better to raise an error before starting to modify the config # file than after it was modified self._my_hostname = utils.HostInfo().name + self._last_cluster_serial = -1 self._OpenConfig() # this method needs to be static, so that we can call it on the class @@ -155,13 +164,13 @@ class ConfigWriter: This checks the current node, instances and disk names for duplicates. - Args: - - exceptions: a list with some other names which should be checked - for uniqueness (used for example when you want to get - more than one id at one time without adding each one in - turn to the config file + @param exceptions: a list with some other names which should be checked + for uniqueness (used for example when you want to get + more than one id at one time without adding each one in + turn to the config file) - Returns: the unique id as a string + @rtype: string + @return: the unique id """ existing = set() @@ -185,6 +194,9 @@ class ConfigWriter: def _AllMACs(self): """Return all MACs present in the config. + @rtype: list + @return: the list of all MACs + """ result = [] for instance in self._config_data.instances.values(): @@ -196,6 +208,9 @@ class ConfigWriter: def _AllDRBDSecrets(self): """Return all DRBD secrets present in the config. + @rtype: list + @return: the list of all DRBD secrets + """ def helper(disk, result): """Recursively gather secrets from this disk.""" @@ -214,7 +229,8 @@ class ConfigWriter: @locking.ssynchronized(_config_lock, shared=1) def VerifyConfig(self): - """Stub verify function. + """Verify function. + """ result = [] seen_macs = [] @@ -251,7 +267,7 @@ class ConfigWriter: ports[net_port].append((instance.name, "network port")) # cluster-wide pool of free ports - for free_port in self._config_data.cluster.tcpudp_port_pool: + for free_port in data.cluster.tcpudp_port_pool: if free_port not in ports: ports[free_port] = [] ports[free_port].append(("cluster", "port marked as free")) @@ -267,10 +283,17 @@ class ConfigWriter: # highest used tcp port check if keys: - if keys[-1] > self._config_data.cluster.highest_used_port: + if keys[-1] > data.cluster.highest_used_port: result.append("Highest used port mismatch, saved %s, computed %s" % - (self._config_data.cluster.highest_used_port, - keys[-1])) + (data.cluster.highest_used_port, keys[-1])) + + if not data.nodes[data.cluster.master_node].master_candidate: + result.append("Master node is not a master candidate") + + mc_now, mc_max = self._UnlockedGetMasterCandidateStats() + if mc_now < mc_max: + result.append("Not enough master candidates: actual %d, target %d" % + (mc_now, mc_max)) return result @@ -369,9 +392,9 @@ class ConfigWriter: def _ComputeDRBDMap(self, instance): """Compute the used DRBD minor/nodes. - Return: dictionary of node_name: dict of minor: instance_name. The - returned dict will have all the nodes in it (even if with an empty - list). + @return: dictionary of node_name: dict of minor: instance_name; + the returned dict will have all the nodes in it (even if with + an empty list). """ def _AppendUsedPorts(instance_name, disk, used): @@ -513,9 +536,9 @@ class ConfigWriter: def GetHostKey(self): """Return the rsa hostkey from the config. - Args: None + @rtype: string + @return: the rsa hostkey - Returns: rsa hostkey """ return self._config_data.cluster.rsahostkeypub @@ -525,8 +548,9 @@ class ConfigWriter: This should be used after creating a new instance. - Args: - instance: the instance object + @type instance: L{objects.Instance} + @param instance: the instance object + """ if not isinstance(instance, objects.Instance): raise errors.ProgrammerError("Invalid type passed to AddInstance") @@ -620,9 +644,8 @@ class ConfigWriter: def GetInstanceList(self): """Get the list of instances. - Returns: - array of instances, ex. ['instance2.example.com','instance1.example.com'] - these contains all the instances, also the ones in Admin_down state + @return: array of instances, ex. ['instance2.example.com', + 'instance1.example.com'] """ return self._UnlockedGetInstanceList() @@ -653,11 +676,11 @@ class ConfigWriter: It takes the information from the configuration file. Other informations of an instance are taken from the live systems. - Args: - instance: name of the instance, ex instance1.example.com + @param instance_name: name of the instance, e.g. + I{instance1.example.com} - Returns: - the instance object + @rtype: L{objects.Instance} + @return: the instance object """ return self._UnlockedGetInstanceInfo(instance_name) @@ -679,8 +702,8 @@ class ConfigWriter: def AddNode(self, node): """Add a node to the configuration. - Args: - node: an object.Node instance + @type node: L{objects.Node} + @param node: a Node instance """ logging.info("Adding node %s to configuration" % node.name) @@ -715,11 +738,13 @@ class ConfigWriter: def _UnlockedGetNodeInfo(self, node_name): """Get the configuration of a node, as stored in the config. - This function is for internal use, when the config lock is already held. + This function is for internal use, when the config lock is already + held. - Args: node: nodename (tuple) of the node + @param node_name: the node name, e.g. I{node1.example.com} - Returns: the node object + @rtype: L{objects.Node} + @return: the node object """ if node_name not in self._config_data.nodes: @@ -732,9 +757,12 @@ class ConfigWriter: def GetNodeInfo(self, node_name): """Get the configuration of a node, as stored in the config. - Args: node: nodename (tuple) of the node + This is just a locked wrapper over L{_UnlockedGetNodeInfo}. - Returns: the node object + @param node_name: the node name, e.g. I{node1.example.com} + + @rtype: L{objects.Node} + @return: the node object """ return self._UnlockedGetNodeInfo(node_name) @@ -742,7 +770,10 @@ class ConfigWriter: def _UnlockedGetNodeList(self): """Return the list of nodes which are in the configuration. - This function is for internal use, when the config lock is already held. + This function is for internal use, when the config lock is already + held. + + @rtype: list """ return self._config_data.nodes.keys() @@ -756,11 +787,20 @@ class ConfigWriter: return self._UnlockedGetNodeList() @locking.ssynchronized(_config_lock, shared=1) + def GetOnlineNodeList(self): + """Return the list of nodes which are online. + + """ + all_nodes = [self._UnlockedGetNodeInfo(node) + for node in self._UnlockedGetNodeList()] + return [node.name for node in all_nodes if not node.offline] + + @locking.ssynchronized(_config_lock, shared=1) def GetAllNodesInfo(self): """Get the configuration of all nodes. @rtype: dict - @returns: dict of (node, node_info), where node_info is what + @return: dict of (node, node_info), where node_info is what would GetNodeInfo return for the node """ @@ -768,6 +808,67 @@ class ConfigWriter: for node in self._UnlockedGetNodeList()]) return my_dict + def _UnlockedGetMasterCandidateStats(self): + """Get the number of current and maximum desired and possible candidates. + + @rtype: tuple + @return: tuple of (current, desired and possible) + + """ + mc_now = mc_max = 0 + for node in self._config_data.nodes.itervalues(): + if not node.offline: + mc_max += 1 + if node.master_candidate: + mc_now += 1 + mc_max = min(mc_max, self._config_data.cluster.candidate_pool_size) + return (mc_now, mc_max) + + @locking.ssynchronized(_config_lock, shared=1) + def GetMasterCandidateStats(self): + """Get the number of current and maximum possible candidates. + + This is just a wrapper over L{_UnlockedGetMasterCandidateStats}. + + @rtype: tuple + @return: tuple of (current, max) + + """ + return self._UnlockedGetMasterCandidateStats() + + @locking.ssynchronized(_config_lock) + def MaintainCandidatePool(self): + """Try to grow the candidate pool to the desired size. + + @rtype: list + @return: list with the adjusted nodes (L{objects.Node} instances) + + """ + mc_now, mc_max = self._UnlockedGetMasterCandidateStats() + mod_list = [] + if mc_now < mc_max: + node_list = self._config_data.nodes.keys() + random.shuffle(node_list) + for name in node_list: + if mc_now >= mc_max: + break + node = self._config_data.nodes[name] + if node.master_candidate or node.offline: + continue + mod_list.append(node) + node.master_candidate = True + node.serial_no += 1 + mc_now += 1 + if mc_now != mc_max: + # this should not happen + logging.warning("Warning: MaintainCandidatePool didn't manage to" + " fill the candidate pool (%d/%d)", mc_now, mc_max) + if mod_list: + self._config_data.cluster.serial_no += 1 + self._WriteConfig() + + return mod_list + def _BumpSerialNo(self): """Bump up the serial number of the config. @@ -777,10 +878,6 @@ class ConfigWriter: def _OpenConfig(self): """Read the config data from disk. - In case we already have configuration data and the config file has - the same mtime as when we read it, we skip the parsing of the - file, since de-serialisation could be slow. - """ f = open(self._cfg_file, 'r') try: @@ -799,7 +896,7 @@ class ConfigWriter: raise errors.ConfigurationError("Incomplete configuration" " (missing cluster.rsahostkeypub)") self._config_data = data - # init the last serial as -1 so that the next write will cause + # reset the last serial as -1 so that the next write will cause # ssconf update self._last_cluster_serial = -1 @@ -813,23 +910,26 @@ class ConfigWriter: if self._offline: return True bad = False - nodelist = self._UnlockedGetNodeList() - myhostname = self._my_hostname - try: - nodelist.remove(myhostname) - except ValueError: - pass + node_list = [] + addr_list = [] + myhostname = self._my_hostname # we can skip checking whether _UnlockedGetNodeInfo returns None # since the node list comes from _UnlocketGetNodeList, and we are # called with the lock held, so no modifications should take place # in between - address_list = [self._UnlockedGetNodeInfo(name).primary_ip - for name in nodelist] + for node_name in self._UnlockedGetNodeList(): + if node_name == myhostname: + continue + node_info = self._UnlockedGetNodeInfo(node_name) + if not node_info.master_candidate: + continue + node_list.append(node_info.name) + addr_list.append(node_info.primary_ip) - result = rpc.RpcRunner.call_upload_file(nodelist, self._cfg_file, - address_list=address_list) - for node in nodelist: + result = rpc.RpcRunner.call_upload_file(node_list, self._cfg_file, + address_list=addr_list) + for node in node_list: if not result[node]: logging.error("copy of file %s to node %s failed", self._cfg_file, node) @@ -856,7 +956,7 @@ class ConfigWriter: os.rename(name, destination) self.write_count += 1 - # and redistribute the config file + # and redistribute the config file to master candidates self._DistributeConfig() # Write ssconf files on all nodes (including locally) @@ -867,11 +967,31 @@ class ConfigWriter: self._last_cluster_serial = self._config_data.cluster.serial_no def _UnlockedGetSsconfValues(self): + """Return the values needed by ssconf. + + @rtype: dict + @return: a dictionary with keys the ssconf names and values their + associated value + + """ + fn = "\n".join + node_names = utils.NiceSort(self._UnlockedGetNodeList()) + node_info = [self._UnlockedGetNodeInfo(name) for name in node_names] + + off_data = fn(node.name for node in node_info if node.offline) + mc_data = fn(node.name for node in node_info if node.master_candidate) + node_data = fn(node_names) + + cluster = self._config_data.cluster return { - "cluster_name": self._config_data.cluster.cluster_name, - "master_ip": self._config_data.cluster.master_ip, - "master_netdev": self._config_data.cluster.master_netdev, - "master_node": self._config_data.cluster.master_node, + constants.SS_CLUSTER_NAME: cluster.cluster_name, + constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir, + constants.SS_MASTER_CANDIDATES: mc_data, + constants.SS_MASTER_IP: cluster.master_ip, + constants.SS_MASTER_NETDEV: cluster.master_netdev, + constants.SS_MASTER_NODE: cluster.master_node, + constants.SS_NODE_LIST: node_data, + constants.SS_OFFLINE_NODES: off_data, } @locking.ssynchronized(_config_lock) @@ -934,8 +1054,8 @@ class ConfigWriter: def GetClusterInfo(self): """Returns informations about the cluster - Returns: - the cluster object + @rtype: L{objects.Cluster} + @return: the cluster object """ return self._config_data.cluster @@ -950,25 +1070,32 @@ class ConfigWriter: that all modified objects will be saved, but the target argument is the one the caller wants to ensure that it's saved. + @param target: an instance of either L{objects.Cluster}, + L{objects.Node} or L{objects.Instance} which is existing in + the cluster + """ if self._config_data is None: raise errors.ProgrammerError("Configuration file not read," " cannot save.") - update_serial = True + update_serial = False if isinstance(target, objects.Cluster): test = target == self._config_data.cluster elif isinstance(target, objects.Node): test = target in self._config_data.nodes.values() + update_serial = True elif isinstance(target, objects.Instance): test = target in self._config_data.instances.values() - update_serial = False else: raise errors.ProgrammerError("Invalid object type (%s) passed to" " ConfigWriter.Update" % type(target)) if not test: raise errors.ConfigurationError("Configuration updated since object" " has been read or unknown object") + target.serial_no += 1 + if update_serial: - target.serial_no += 1 + # for node updates, we need to increase the cluster serial too + self._config_data.cluster.serial_no += 1 self._WriteConfig()