X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/23269c5bbc9d6977ab7cbe570e77d51c126e81b6..8e66b9bfff134a6190aebdbe786a102778b3ecec:/qa/qa_daemon.py diff --git a/qa/qa_daemon.py b/qa/qa_daemon.py index 8b40edf..6b0ce93 100644 --- a/qa/qa_daemon.py +++ b/qa/qa_daemon.py @@ -1,4 +1,7 @@ -# Copyright (C) 2007 Google Inc. +# +# + +# Copyright (C) 2007, 2008, 2009, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -29,117 +32,116 @@ import qa_config import qa_utils import qa_error -from qa_utils import AssertEqual, StartSSH +from qa_utils import AssertMatch, AssertCommand, StartSSH, GetCommandOutput -def _InstanceRunning(node, name): +def _InstanceRunning(name): """Checks whether an instance is running. - Args: - node: Node the instance runs on - name: Full name of Xen instance + @param name: full name of the instance + """ - cmd = utils.ShellQuoteArgs(['xm', 'list', name]) + ' >/dev/null' - ret = StartSSH(node['primary'], cmd).wait() + master = qa_config.GetMasterNode() + + cmd = (utils.ShellQuoteArgs(["gnt-instance", "list", "-o", "status", name]) + + ' | grep running') + ret = StartSSH(master["primary"], cmd).wait() return ret == 0 -def _XmShutdownInstance(node, name): - """Shuts down instance using "xm" and waits for completion. +def _ShutdownInstance(name): + """Shuts down instance without recording state and waits for completion. - Args: - node: Node the instance runs on - name: Full name of Xen instance - """ - master = qa_config.GetMasterNode() + @param name: full name of the instance - cmd = ['xm', 'shutdown', name] - AssertEqual(StartSSH(node['primary'], - utils.ShellQuoteArgs(cmd)).wait(), 0) + """ + AssertCommand(["gnt-instance", "shutdown", "--no-remember", name]) - # Wait up to a minute - end = time.time() + 60 - while time.time() <= end: - if not _InstanceRunning(node, name): - break - time.sleep(5) - else: - raise qa_error.Error("xm shutdown failed") + if _InstanceRunning(name): + raise qa_error.Error("instance shutdown failed") -def _ResetWatcherDaemon(node): +def _ResetWatcherDaemon(): """Removes the watcher daemon's state file. - Args: - node: Node to be reset """ - cmd = ['rm', '-f', constants.WATCHER_STATEFILE] - AssertEqual(StartSSH(node['primary'], - utils.ShellQuoteArgs(cmd)).wait(), 0) + AssertCommand([ + "bash", "-c", + "rm -vf %s" % (constants.WATCHER_GROUP_STATE_FILE % "*-*-*-*") + ]) -def PrintCronWarning(): - """Shows a warning about the required cron job. +def _RunWatcherDaemon(): + """Runs the ganeti-watcher daemon on the master node. """ - print - qa_utils.PrintWarning("The following tests require the cron script for" - " ganeti-watcher to be set up.") + AssertCommand(["ganeti-watcher", "-d", "--ignore-pause", "--wait-children"]) -def TestInstanceAutomaticRestart(node, instance): - """Test automatic restart of instance by ganeti-watcher. +def TestPauseWatcher(): + """Tests and pauses the watcher. - Note: takes up to 6 minutes to complete. """ master = qa_config.GetMasterNode() - inst_name = qa_utils.ResolveInstanceName(instance) - _ResetWatcherDaemon(node) - _XmShutdownInstance(node, inst_name) + AssertCommand(["gnt-cluster", "watcher", "pause", "4h"]) - # Give it a bit more than five minutes to start again - restart_at = time.time() + 330 + cmd = ["gnt-cluster", "watcher", "info"] + output = GetCommandOutput(master["primary"], + utils.ShellQuoteArgs(cmd)) + AssertMatch(output, r"^.*\bis paused\b.*") - # Wait until it's running again - while time.time() <= restart_at: - if _InstanceRunning(node, inst_name): - break - time.sleep(15) - else: - raise qa_error.Error("Daemon didn't restart instance in time") - cmd = ['gnt-instance', 'info', inst_name] - AssertEqual(StartSSH(master['primary'], - utils.ShellQuoteArgs(cmd)).wait(), 0) +def TestResumeWatcher(): + """Tests and unpauses the watcher. + """ + master = qa_config.GetMasterNode() -def TestInstanceConsecutiveFailures(node, instance): - """Test five consecutive instance failures. + AssertCommand(["gnt-cluster", "watcher", "continue"]) + + cmd = ["gnt-cluster", "watcher", "info"] + output = GetCommandOutput(master["primary"], + utils.ShellQuoteArgs(cmd)) + AssertMatch(output, r"^.*\bis not paused\b.*") + + +def TestInstanceAutomaticRestart(instance): + """Test automatic restart of instance by ganeti-watcher. - Note: takes at least 35 minutes to complete. """ - master = qa_config.GetMasterNode() - inst_name = qa_utils.ResolveInstanceName(instance) + inst_name = qa_utils.ResolveInstanceName(instance["name"]) + + _ResetWatcherDaemon() + _ShutdownInstance(inst_name) + + _RunWatcherDaemon() + time.sleep(5) - _ResetWatcherDaemon(node) - _XmShutdownInstance(node, inst_name) + if not _InstanceRunning(inst_name): + raise qa_error.Error("Daemon didn't restart instance") - # Do shutdowns for 30 minutes - finished_at = time.time() + (35 * 60) + AssertCommand(["gnt-instance", "info", inst_name]) - while time.time() <= finished_at: - if _InstanceRunning(node, inst_name): - _XmShutdownInstance(node, inst_name) - time.sleep(30) - # Check for some time whether the instance doesn't start again - check_until = time.time() + 330 - while time.time() <= check_until: - if _InstanceRunning(node, inst_name): - raise qa_error.Error("Instance started when it shouldn't") - time.sleep(30) +def TestInstanceConsecutiveFailures(instance): + """Test five consecutive instance failures. + + """ + inst_name = qa_utils.ResolveInstanceName(instance["name"]) + + _ResetWatcherDaemon() + + for should_start in ([True] * 5) + [False]: + _ShutdownInstance(inst_name) + _RunWatcherDaemon() + time.sleep(5) + + if bool(_InstanceRunning(inst_name)) != should_start: + if should_start: + msg = "Instance not started when it should" + else: + msg = "Instance started when it shouldn't" + raise qa_error.Error(msg) - cmd = ['gnt-instance', 'info', inst_name] - AssertEqual(StartSSH(master['primary'], - utils.ShellQuoteArgs(cmd)).wait(), 0) + AssertCommand(["gnt-instance", "info", inst_name])