#!/usr/bin/python
#
-# Copyright (C) 2010 Google Inc.
+# Copyright (C) 2010, 2012 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
"""
-# pylint: disable-msg=C0103
+# pylint: disable=C0103
# C0103: Invalid name cluster-merge
import logging
from ganeti import errors
from ganeti import ssh
from ganeti import utils
+from ganeti import pathutils
+from ganeti import compat
_GROUPS_MERGE = "merge"
" same name (One of: %s/%s/%s)" %
_RESTART_CHOICES))
-SKIP_STOP_INSTANCES_OPT = cli.cli_option("--skip-stop-instances", default=True,
- action="store_false", dest="stop_instances",
- help=("Don't stop the instances on the"
- " clusters, just check that none"
- " is running"))
+SKIP_STOP_INSTANCES_OPT = \
+ cli.cli_option("--skip-stop-instances", default=True, action="store_false",
+ dest="stop_instances",
+ help=("Don't stop the instances on the clusters, just check "
+ "that none is running"))
def Flatten(unflattened_list):
"""Container class to hold data used for merger.
"""
- def __init__(self, cluster, key_path, nodes, instances, config_path=None):
+ def __init__(self, cluster, key_path, nodes, instances, master_node,
+ config_path=None):
"""Initialize the container.
@param cluster: The name of the cluster
@param key_path: Path to the ssh private key used for authentication
@param nodes: List of online nodes in the merging cluster
@param instances: List of instances running on merging cluster
+ @param master_node: Name of the master node
@param config_path: Path to the merging cluster config
"""
self.key_path = key_path
self.nodes = nodes
self.instances = instances
+ self.master_node = master_node
self.config_path = config_path
"""Handling the merge.
"""
- RUNNING_STATUSES = frozenset([
+ RUNNING_STATUSES = compat.UniqueFrozenset([
constants.INSTST_RUNNING,
constants.INSTST_ERRORUP,
])
+
def __init__(self, clusters, pause_period, groups, restart, params,
stop_instances):
"""Initialize object with sane defaults and infos required.
if self.restart == _RESTART_UP:
raise NotImplementedError
-
def Setup(self):
"""Sets up our end so we can do the merger.
utils.WriteFile(key_path, mode=0600, data=result.stdout)
result = self._RunCmd(cluster, "gnt-node list -o name,offline"
- " --no-header --separator=,", private_key=key_path)
+ " --no-headers --separator=,", private_key=key_path)
if result.failed:
raise errors.RemoteError("Unable to retrieve list of nodes from %s."
" Fail reason: %s; output: %s" %
(cluster, result.fail_reason, result.output))
- nodes_statuses = [line.split(',') for line in result.stdout.splitlines()]
+ nodes_statuses = [line.split(",") for line in result.stdout.splitlines()]
nodes = [node_status[0] for node_status in nodes_statuses
if node_status[1] == "N"]
- result = self._RunCmd(cluster, "gnt-instance list -o name --no-header",
+ result = self._RunCmd(cluster, "gnt-instance list -o name --no-headers",
private_key=key_path)
if result.failed:
raise errors.RemoteError("Unable to retrieve list of instances from"
(cluster, result.fail_reason, result.output))
instances = result.stdout.splitlines()
- self.merger_data.append(MergerData(cluster, key_path, nodes, instances))
+ path = utils.PathJoin(pathutils.DATA_DIR, "ssconf_%s" %
+ constants.SS_MASTER_NODE)
+ result = self._RunCmd(cluster, "cat %s" % path, private_key=key_path)
+ if result.failed:
+ raise errors.RemoteError("Unable to retrieve the master node name from"
+ " %s. Fail reason: %s; output: %s" %
+ (cluster, result.fail_reason, result.output))
+ master_node = result.stdout.strip()
+
+ self.merger_data.append(MergerData(cluster, key_path, nodes, instances,
+ master_node))
def _PrepareAuthorizedKeys(self):
"""Prepare the authorized_keys on every merging node.
"""
for _ in range(max_attempts):
result = self.ssh_runner.Run(hostname=hostname, command=command,
- user=user, use_cluster_key=use_cluster_key,
- strict_host_check=strict_host_check,
- private_key=private_key, batch=batch,
- ask_key=ask_key)
+ user=user, use_cluster_key=use_cluster_key,
+ strict_host_check=strict_host_check,
+ private_key=private_key, batch=batch,
+ ask_key=ask_key)
if not result.failed:
break
" Fail reason: %s; output: %s" %
(cluster, result.fail_reason, result.output))
+ def _RemoveMasterIps(self):
+ """Removes the master IPs from the master nodes of each cluster.
+
+ """
+ for data in self.merger_data:
+ result = self._RunCmd(data.master_node,
+ "gnt-cluster deactivate-master-ip --yes")
+
+ if result.failed:
+ raise errors.RemoteError("Unable to remove master IP on %s."
+ " Fail reason: %s; output: %s" %
+ (data.master_node,
+ result.fail_reason,
+ result.output))
+
def _StopDaemons(self):
"""Stop all daemons on merging nodes.
"""
- cmd = "%s stop-all" % constants.DAEMON_UTIL
+ cmd = "%s stop-all" % pathutils.DAEMON_UTIL
for data in self.merger_data:
for node in data.nodes:
result = self._RunCmd(node, cmd, max_attempts=3)
"""
for data in self.merger_data:
result = self._RunCmd(data.cluster, "cat %s" %
- constants.CLUSTER_CONF_FILE)
+ pathutils.CLUSTER_CONF_FILE)
if result.failed:
raise errors.RemoteError("Unable to retrieve remote config on %s."
utils.WriteFile(data.config_path, data=result.stdout)
# R0201: Method could be a function
- def _KillMasterDaemon(self): # pylint: disable-msg=R0201
+ def _KillMasterDaemon(self): # pylint: disable=R0201
"""Kills the local master daemon.
@raise errors.CommandError: If unable to kill
"""
- result = utils.RunCmd([constants.DAEMON_UTIL, "stop-master"])
+ result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"])
if result.failed:
raise errors.CommandError("Unable to stop master daemons."
" Fail reason: %s; output: %s" %
other_cluster.cluster_name)
# R0201: Method could be a function
- def _GetOsHypervisor(self, cluster, os_name, hyp): # pylint: disable-msg=R0201
+ def _GetOsHypervisor(self, cluster, os_name, hyp): # pylint: disable=R0201
if os_name in cluster.os_hvp:
return cluster.os_hvp[os_name].get(hyp, None)
else:
ConfigWriter.AddNodeGroup takes care of making sure there are no conflicts.
"""
- # pylint: disable-msg=R0201
+ # pylint: disable=R0201
logging.info("Node group conflict strategy: %s", self.groups)
my_grps = my_config.GetAllNodeGroupsInfo().values()
for node_name in other_grp.members[:]:
node = other_config.GetNodeInfo(node_name)
# Access to a protected member of a client class
- # pylint: disable-msg=W0212
+ # pylint: disable=W0212
other_config._UnlockedRemoveNodeFromGroup(node)
# Access to a protected member of a client class
- # pylint: disable-msg=W0212
+ # pylint: disable=W0212
my_grp_uuid = my_config._UnlockedLookupNodeGroup(other_grp.name)
# Access to a protected member of a client class
- # pylint: disable-msg=W0212
+ # pylint: disable=W0212
my_config._UnlockedAddNodeToGroup(node, my_grp_uuid)
node.group = my_grp_uuid
# Remove from list of groups to add
my_config.AddNodeGroup(grp, _CLUSTERMERGE_ECID)
# R0201: Method could be a function
- def _StartMasterDaemon(self, no_vote=False): # pylint: disable-msg=R0201
+ def _StartMasterDaemon(self, no_vote=False): # pylint: disable=R0201
"""Starts the local master daemon.
@param no_vote: Should the masterd started without voting? default: False
if no_vote:
env["EXTRA_MASTERD_ARGS"] = "--no-voting --yes-do-it"
- result = utils.RunCmd([constants.DAEMON_UTIL, "start-master"], env=env)
+ result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env)
if result.failed:
raise errors.CommandError("Couldn't start ganeti master."
" Fail reason: %s; output: %s" %
"""
for data in self.merger_data:
for node in data.nodes:
+ logging.info("Readding node %s", node)
result = utils.RunCmd(["gnt-node", "add", "--readd",
- "--no-ssh-key-check", "--force-join", node])
+ "--no-ssh-key-check", node])
if result.failed:
logging.error("%s failed to be readded. Reason: %s, output: %s",
node, result.fail_reason, result.output)
if result.failed:
raise errors.CommandError("Redistribution failed. Fail reason: %s;"
" output: %s" % (result.fail_reason,
- result.output))
+ result.output))
# R0201: Method could be a function
- def _StartupAllInstances(self): # pylint: disable-msg=R0201
+ def _StartupAllInstances(self): # pylint: disable=R0201
"""Starts up all instances (locally).
@raise errors.CommandError: If unable to start clusters
# R0201: Method could be a function
# TODO: make this overridable, for some verify errors
- def _VerifyCluster(self): # pylint: disable-msg=R0201
+ def _VerifyCluster(self): # pylint: disable=R0201
"""Runs gnt-cluster verify to verify the health.
@raise errors.ProgrammError: If cluster fails on verification
" mergees")
logging.info("Disable watcher")
self._DisableWatcher()
- logging.info("Stop daemons on merging nodes")
- self._StopDaemons()
logging.info("Merging config")
self._FetchRemoteConfig()
+ logging.info("Removing master IPs on mergee master nodes")
+ self._RemoveMasterIps()
+ logging.info("Stop daemons on merging nodes")
+ self._StopDaemons()
logging.info("Stopping master daemon")
self._KillMasterDaemon()
rbsteps.append("Restore %s from another master candidate"
" and restart master daemon" %
- constants.CLUSTER_CONF_FILE)
+ pathutils.CLUSTER_CONF_FILE)
self._MergeConfig()
self._StartMasterDaemon(no_vote=True)
shutil.rmtree(self.work_dir)
-def SetupLogging(options):
- """Setting up logging infrastructure.
-
- @param options: Parsed command line options
-
- """
- formatter = logging.Formatter("%(asctime)s: %(levelname)s %(message)s")
-
- stderr_handler = logging.StreamHandler()
- stderr_handler.setFormatter(formatter)
- if options.debug:
- stderr_handler.setLevel(logging.NOTSET)
- elif options.verbose:
- stderr_handler.setLevel(logging.INFO)
- else:
- stderr_handler.setLevel(logging.WARNING)
-
- root_logger = logging.getLogger("")
- root_logger.setLevel(logging.NOTSET)
- root_logger.addHandler(stderr_handler)
-
-
def main():
"""Main routine.
(options, args) = parser.parse_args()
- SetupLogging(options)
+ utils.SetupToolLogging(options.debug, options.verbose)
if not args:
parser.error("No clusters specified")