X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/1fb5f905797aa33f53e929af6f57d427a1067e23..43ffd83959093e1cd56c45bf7eac1c86d77b8ee7:/tools/cluster-merge diff --git a/tools/cluster-merge b/tools/cluster-merge index a76aec1..a4787d4 100755 --- a/tools/cluster-merge +++ b/tools/cluster-merge @@ -1,7 +1,7 @@ #!/usr/bin/python # -# Copyright (C) 2010 Google Inc. +# Copyright (C) 2010, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -24,7 +24,7 @@ The clusters have to run the same version of Ganeti! """ -# pylint: disable-msg=C0103 +# pylint: disable=C0103 # C0103: Invalid name cluster-merge import logging @@ -40,6 +40,8 @@ from ganeti import constants from ganeti import errors from ganeti import ssh from ganeti import utils +from ganeti import pathutils +from ganeti import compat _GROUPS_MERGE = "merge" @@ -81,12 +83,11 @@ RESTART_OPT = cli.cli_option("--restart", default=_RESTART_ALL, " same name (One of: %s/%s/%s)" % _RESTART_CHOICES)) -SKIP_STOP_INSTANCES_OPT = cli.cli_option("--skip-stop-instances", default=True, - action="store_false", type="boolean", - dest="stop_instances", - help=("Don't stop the instances on the" - " clusters, but just to check" - " that none is running")) +SKIP_STOP_INSTANCES_OPT = \ + cli.cli_option("--skip-stop-instances", default=True, action="store_false", + dest="stop_instances", + help=("Don't stop the instances on the clusters, just check " + "that none is running")) def Flatten(unflattened_list): @@ -110,13 +111,15 @@ class MergerData(object): """Container class to hold data used for merger. """ - def __init__(self, cluster, key_path, nodes, instances, config_path=None): + def __init__(self, cluster, key_path, nodes, instances, master_node, + config_path=None): """Initialize the container. @param cluster: The name of the cluster @param key_path: Path to the ssh private key used for authentication @param nodes: List of online nodes in the merging cluster @param instances: List of instances running on merging cluster + @param master_node: Name of the master node @param config_path: Path to the merging cluster config """ @@ -124,6 +127,7 @@ class MergerData(object): self.key_path = key_path self.nodes = nodes self.instances = instances + self.master_node = master_node self.config_path = config_path @@ -131,10 +135,11 @@ class Merger(object): """Handling the merge. """ - RUNNING_STATUSES = frozenset([ + RUNNING_STATUSES = compat.UniqueFrozenset([ constants.INSTST_RUNNING, constants.INSTST_ERRORUP, ]) + def __init__(self, clusters, pause_period, groups, restart, params, stop_instances): """Initialize object with sane defaults and infos required. @@ -161,7 +166,6 @@ class Merger(object): if self.restart == _RESTART_UP: raise NotImplementedError - def Setup(self): """Sets up our end so we can do the merger. @@ -190,16 +194,16 @@ class Merger(object): utils.WriteFile(key_path, mode=0600, data=result.stdout) result = self._RunCmd(cluster, "gnt-node list -o name,offline" - " --no-header --separator=,", private_key=key_path) + " --no-headers --separator=,", private_key=key_path) if result.failed: raise errors.RemoteError("Unable to retrieve list of nodes from %s." " Fail reason: %s; output: %s" % (cluster, result.fail_reason, result.output)) - nodes_statuses = [line.split(',') for line in result.stdout.splitlines()] + nodes_statuses = [line.split(",") for line in result.stdout.splitlines()] nodes = [node_status[0] for node_status in nodes_statuses if node_status[1] == "N"] - result = self._RunCmd(cluster, "gnt-instance list -o name --no-header", + result = self._RunCmd(cluster, "gnt-instance list -o name --no-headers", private_key=key_path) if result.failed: raise errors.RemoteError("Unable to retrieve list of instances from" @@ -207,7 +211,17 @@ class Merger(object): (cluster, result.fail_reason, result.output)) instances = result.stdout.splitlines() - self.merger_data.append(MergerData(cluster, key_path, nodes, instances)) + path = utils.PathJoin(pathutils.DATA_DIR, "ssconf_%s" % + constants.SS_MASTER_NODE) + result = self._RunCmd(cluster, "cat %s" % path, private_key=key_path) + if result.failed: + raise errors.RemoteError("Unable to retrieve the master node name from" + " %s. Fail reason: %s; output: %s" % + (cluster, result.fail_reason, result.output)) + master_node = result.stdout.strip() + + self.merger_data.append(MergerData(cluster, key_path, nodes, instances, + master_node)) def _PrepareAuthorizedKeys(self): """Prepare the authorized_keys on every merging node. @@ -241,10 +255,10 @@ class Merger(object): """ for _ in range(max_attempts): result = self.ssh_runner.Run(hostname=hostname, command=command, - user=user, use_cluster_key=use_cluster_key, - strict_host_check=strict_host_check, - private_key=private_key, batch=batch, - ask_key=ask_key) + user=user, use_cluster_key=use_cluster_key, + strict_host_check=strict_host_check, + private_key=private_key, batch=batch, + ask_key=ask_key) if not result.failed: break @@ -259,7 +273,7 @@ class Merger(object): """ for cluster in self.clusters: result = self._RunCmd(cluster, "gnt-instance list -o status") - if self.RUNNING_STATUSES.intersect(result.output.splitlines()): + if self.RUNNING_STATUSES.intersection(result.output.splitlines()): return True return False @@ -290,11 +304,26 @@ class Merger(object): " Fail reason: %s; output: %s" % (cluster, result.fail_reason, result.output)) + def _RemoveMasterIps(self): + """Removes the master IPs from the master nodes of each cluster. + + """ + for data in self.merger_data: + result = self._RunCmd(data.master_node, + "gnt-cluster deactivate-master-ip --yes") + + if result.failed: + raise errors.RemoteError("Unable to remove master IP on %s." + " Fail reason: %s; output: %s" % + (data.master_node, + result.fail_reason, + result.output)) + def _StopDaemons(self): """Stop all daemons on merging nodes. """ - cmd = "%s stop-all" % constants.DAEMON_UTIL + cmd = "%s stop-all" % pathutils.DAEMON_UTIL for data in self.merger_data: for node in data.nodes: result = self._RunCmd(node, cmd, max_attempts=3) @@ -312,7 +341,7 @@ class Merger(object): """ for data in self.merger_data: result = self._RunCmd(data.cluster, "cat %s" % - constants.CLUSTER_CONF_FILE) + pathutils.CLUSTER_CONF_FILE) if result.failed: raise errors.RemoteError("Unable to retrieve remote config on %s." @@ -325,13 +354,13 @@ class Merger(object): utils.WriteFile(data.config_path, data=result.stdout) # R0201: Method could be a function - def _KillMasterDaemon(self): # pylint: disable-msg=R0201 + def _KillMasterDaemon(self): # pylint: disable=R0201 """Kills the local master daemon. @raise errors.CommandError: If unable to kill """ - result = utils.RunCmd([constants.DAEMON_UTIL, "stop-master"]) + result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"]) if result.failed: raise errors.CommandError("Unable to stop master daemons." " Fail reason: %s; output: %s" % @@ -525,7 +554,7 @@ class Merger(object): other_cluster.cluster_name) # R0201: Method could be a function - def _GetOsHypervisor(self, cluster, os_name, hyp): # pylint: disable-msg=R0201 + def _GetOsHypervisor(self, cluster, os_name, hyp): # pylint: disable=R0201 if os_name in cluster.os_hvp: return cluster.os_hvp[os_name].get(hyp, None) else: @@ -537,7 +566,7 @@ class Merger(object): ConfigWriter.AddNodeGroup takes care of making sure there are no conflicts. """ - # pylint: disable-msg=R0201 + # pylint: disable=R0201 logging.info("Node group conflict strategy: %s", self.groups) my_grps = my_config.GetAllNodeGroupsInfo().values() @@ -577,15 +606,15 @@ class Merger(object): for node_name in other_grp.members[:]: node = other_config.GetNodeInfo(node_name) # Access to a protected member of a client class - # pylint: disable-msg=W0212 + # pylint: disable=W0212 other_config._UnlockedRemoveNodeFromGroup(node) # Access to a protected member of a client class - # pylint: disable-msg=W0212 + # pylint: disable=W0212 my_grp_uuid = my_config._UnlockedLookupNodeGroup(other_grp.name) # Access to a protected member of a client class - # pylint: disable-msg=W0212 + # pylint: disable=W0212 my_config._UnlockedAddNodeToGroup(node, my_grp_uuid) node.group = my_grp_uuid # Remove from list of groups to add @@ -596,7 +625,7 @@ class Merger(object): my_config.AddNodeGroup(grp, _CLUSTERMERGE_ECID) # R0201: Method could be a function - def _StartMasterDaemon(self, no_vote=False): # pylint: disable-msg=R0201 + def _StartMasterDaemon(self, no_vote=False): # pylint: disable=R0201 """Starts the local master daemon. @param no_vote: Should the masterd started without voting? default: False @@ -607,7 +636,7 @@ class Merger(object): if no_vote: env["EXTRA_MASTERD_ARGS"] = "--no-voting --yes-do-it" - result = utils.RunCmd([constants.DAEMON_UTIL, "start-master"], env=env) + result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env) if result.failed: raise errors.CommandError("Couldn't start ganeti master." " Fail reason: %s; output: %s" % @@ -621,8 +650,9 @@ class Merger(object): """ for data in self.merger_data: for node in data.nodes: + logging.info("Readding node %s", node) result = utils.RunCmd(["gnt-node", "add", "--readd", - "--no-ssh-key-check", "--force-join", node]) + "--no-ssh-key-check", node]) if result.failed: logging.error("%s failed to be readded. Reason: %s, output: %s", node, result.fail_reason, result.output) @@ -631,10 +661,10 @@ class Merger(object): if result.failed: raise errors.CommandError("Redistribution failed. Fail reason: %s;" " output: %s" % (result.fail_reason, - result.output)) + result.output)) # R0201: Method could be a function - def _StartupAllInstances(self): # pylint: disable-msg=R0201 + def _StartupAllInstances(self): # pylint: disable=R0201 """Starts up all instances (locally). @raise errors.CommandError: If unable to start clusters @@ -649,7 +679,7 @@ class Merger(object): # R0201: Method could be a function # TODO: make this overridable, for some verify errors - def _VerifyCluster(self): # pylint: disable-msg=R0201 + def _VerifyCluster(self): # pylint: disable=R0201 """Runs gnt-cluster verify to verify the health. @raise errors.ProgrammError: If cluster fails on verification @@ -690,17 +720,19 @@ class Merger(object): " mergees") logging.info("Disable watcher") self._DisableWatcher() - logging.info("Stop daemons on merging nodes") - self._StopDaemons() logging.info("Merging config") self._FetchRemoteConfig() + logging.info("Removing master IPs on mergee master nodes") + self._RemoveMasterIps() + logging.info("Stop daemons on merging nodes") + self._StopDaemons() logging.info("Stopping master daemon") self._KillMasterDaemon() rbsteps.append("Restore %s from another master candidate" " and restart master daemon" % - constants.CLUSTER_CONF_FILE) + pathutils.CLUSTER_CONF_FILE) self._MergeConfig() self._StartMasterDaemon(no_vote=True) @@ -750,28 +782,6 @@ class Merger(object): shutil.rmtree(self.work_dir) -def SetupLogging(options): - """Setting up logging infrastructure. - - @param options: Parsed command line options - - """ - formatter = logging.Formatter("%(asctime)s: %(levelname)s %(message)s") - - stderr_handler = logging.StreamHandler() - stderr_handler.setFormatter(formatter) - if options.debug: - stderr_handler.setLevel(logging.NOTSET) - elif options.verbose: - stderr_handler.setLevel(logging.INFO) - else: - stderr_handler.setLevel(logging.WARNING) - - root_logger = logging.getLogger("") - root_logger.setLevel(logging.NOTSET) - root_logger.addHandler(stderr_handler) - - def main(): """Main routine. @@ -790,7 +800,7 @@ def main(): (options, args) = parser.parse_args() - SetupLogging(options) + utils.SetupToolLogging(options.debug, options.verbose) if not args: parser.error("No clusters specified")