-# Copyright (C) 2007 Google Inc.
+#
+#
+
+# Copyright (C) 2007, 2008, 2009, 2010, 2011 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
import qa_utils
import qa_error
-from qa_utils import AssertEqual, StartSSH
+from qa_utils import AssertMatch, AssertCommand, StartSSH, GetCommandOutput
-def _InstanceRunning(node, name):
+def _InstanceRunning(name):
"""Checks whether an instance is running.
- Args:
- node: Node the instance runs on
- name: Full name of Xen instance
+ @param name: full name of the instance
+
"""
- cmd = utils.ShellQuoteArgs(['xm', 'list', name]) + ' >/dev/null'
- ret = StartSSH(node['primary'], cmd).wait()
+ master = qa_config.GetMasterNode()
+
+ cmd = (utils.ShellQuoteArgs(["gnt-instance", "list", "-o", "status", name]) +
+ ' | grep running')
+ ret = StartSSH(master["primary"], cmd).wait()
return ret == 0
-def _XmShutdownInstance(node, name):
- """Shuts down instance using "xm" and waits for completion.
+def _ShutdownInstance(name):
+ """Shuts down instance without recording state and waits for completion.
- Args:
- node: Node the instance runs on
- name: Full name of Xen instance
- """
- master = qa_config.GetMasterNode()
+ @param name: full name of the instance
- cmd = ['xm', 'shutdown', name]
- AssertEqual(StartSSH(node['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+ """
+ AssertCommand(["gnt-instance", "shutdown", "--no-remember", name])
- # Wait up to a minute
- end = time.time() + 60
- while time.time() <= end:
- if not _InstanceRunning(node, name):
- break
- time.sleep(5)
- else:
- raise qa_error.Error("xm shutdown failed")
+ if _InstanceRunning(name):
+ raise qa_error.Error("instance shutdown failed")
-def _ResetWatcherDaemon(node):
+def _ResetWatcherDaemon():
"""Removes the watcher daemon's state file.
- Args:
- node: Node to be reset
"""
- cmd = ['rm', '-f', constants.WATCHER_STATEFILE]
- AssertEqual(StartSSH(node['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+ AssertCommand([
+ "bash", "-c",
+ "rm -vf %s" % (constants.WATCHER_GROUP_STATE_FILE % "*-*-*-*")
+ ])
-def PrintCronWarning():
- """Shows a warning about the required cron job.
+def _RunWatcherDaemon():
+ """Runs the ganeti-watcher daemon on the master node.
"""
- print
- print qa_utils.FormatWarning("The following tests require the cron script "
- "for ganeti-watcher to be set up.")
+ AssertCommand(["ganeti-watcher", "-d", "--ignore-pause", "--wait-children"])
-def TestInstanceAutomaticRestart(node, instance):
- """Test automatic restart of instance by ganeti-watcher.
+def TestPauseWatcher():
+ """Tests and pauses the watcher.
- Note: takes up to 6 minutes to complete.
"""
master = qa_config.GetMasterNode()
- inst_name = qa_utils.ResolveInstanceName(instance)
- _ResetWatcherDaemon(node)
- _XmShutdownInstance(node, inst_name)
+ AssertCommand(["gnt-cluster", "watcher", "pause", "4h"])
- # Give it a bit more than five minutes to start again
- restart_at = time.time() + 330
+ cmd = ["gnt-cluster", "watcher", "info"]
+ output = GetCommandOutput(master["primary"],
+ utils.ShellQuoteArgs(cmd))
+ AssertMatch(output, r"^.*\bis paused\b.*")
- # Wait until it's running again
- while time.time() <= restart_at:
- if _InstanceRunning(node, inst_name):
- break
- time.sleep(15)
- else:
- raise qa_error.Error("Daemon didn't restart instance in time")
- cmd = ['gnt-instance', 'info', inst_name]
- AssertEqual(StartSSH(master['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+def TestResumeWatcher():
+ """Tests and unpauses the watcher.
+ """
+ master = qa_config.GetMasterNode()
-def TestInstanceConsecutiveFailures(node, instance):
- """Test five consecutive instance failures.
+ AssertCommand(["gnt-cluster", "watcher", "continue"])
+
+ cmd = ["gnt-cluster", "watcher", "info"]
+ output = GetCommandOutput(master["primary"],
+ utils.ShellQuoteArgs(cmd))
+ AssertMatch(output, r"^.*\bis not paused\b.*")
+
+
+def TestInstanceAutomaticRestart(instance):
+ """Test automatic restart of instance by ganeti-watcher.
- Note: takes at least 35 minutes to complete.
"""
- master = qa_config.GetMasterNode()
- inst_name = qa_utils.ResolveInstanceName(instance)
+ inst_name = qa_utils.ResolveInstanceName(instance["name"])
+
+ _ResetWatcherDaemon()
+ _ShutdownInstance(inst_name)
+
+ _RunWatcherDaemon()
+ time.sleep(5)
- _ResetWatcherDaemon(node)
- _XmShutdownInstance(node, inst_name)
+ if not _InstanceRunning(inst_name):
+ raise qa_error.Error("Daemon didn't restart instance")
- # Do shutdowns for 30 minutes
- finished_at = time.time() + (35 * 60)
+ AssertCommand(["gnt-instance", "info", inst_name])
- while time.time() <= finished_at:
- if _InstanceRunning(node, inst_name):
- _XmShutdownInstance(node, inst_name)
- time.sleep(30)
- # Check for some time whether the instance doesn't start again
- check_until = time.time() + 330
- while time.time() <= check_until:
- if _InstanceRunning(node, inst_name):
- raise qa_error.Error("Instance started when it shouldn't")
- time.sleep(30)
+def TestInstanceConsecutiveFailures(instance):
+ """Test five consecutive instance failures.
+
+ """
+ inst_name = qa_utils.ResolveInstanceName(instance["name"])
+
+ _ResetWatcherDaemon()
+
+ for should_start in ([True] * 5) + [False]:
+ _ShutdownInstance(inst_name)
+ _RunWatcherDaemon()
+ time.sleep(5)
+
+ if bool(_InstanceRunning(inst_name)) != should_start:
+ if should_start:
+ msg = "Instance not started when it should"
+ else:
+ msg = "Instance started when it shouldn't"
+ raise qa_error.Error(msg)
- cmd = ['gnt-instance', 'info', inst_name]
- AssertEqual(StartSSH(master['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+ AssertCommand(["gnt-instance", "info", inst_name])