Fix validation of vgname in OpClusterSetParams
[ganeti-local] / qa / qa_daemon.py
index 413e5d7..6b0ce93 100644 (file)
@@ -1,4 +1,7 @@
-# Copyright (C) 2007 Google Inc.
+#
+#
+
+# Copyright (C) 2007, 2008, 2009, 2010, 2011 Google Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -29,117 +32,116 @@ import qa_config
 import qa_utils
 import qa_error
 
-from qa_utils import AssertEqual, StartSSH
+from qa_utils import AssertMatch, AssertCommand, StartSSH, GetCommandOutput
 
 
-def _InstanceRunning(node, name):
+def _InstanceRunning(name):
   """Checks whether an instance is running.
 
-  Args:
-    node: Node the instance runs on
-    name: Full name of Xen instance
+  @param name: full name of the instance
+
   """
-  cmd = utils.ShellQuoteArgs(['xm', 'list', name]) + ' >/dev/null'
-  ret = StartSSH(node['primary'], cmd).wait()
+  master = qa_config.GetMasterNode()
+
+  cmd = (utils.ShellQuoteArgs(["gnt-instance", "list", "-o", "status", name]) +
+         ' | grep running')
+  ret = StartSSH(master["primary"], cmd).wait()
   return ret == 0
 
 
-def _XmShutdownInstance(node, name):
-  """Shuts down instance using "xm" and waits for completion.
+def _ShutdownInstance(name):
+  """Shuts down instance without recording state and waits for completion.
 
-  Args:
-    node: Node the instance runs on
-    name: Full name of Xen instance
-  """
-  master = qa_config.GetMasterNode()
+  @param name: full name of the instance
 
-  cmd = ['xm', 'shutdown', name]
-  AssertEqual(StartSSH(node['primary'],
-                       utils.ShellQuoteArgs(cmd)).wait(), 0)
+  """
+  AssertCommand(["gnt-instance", "shutdown", "--no-remember", name])
 
-  # Wait up to a minute
-  end = time.time() + 60
-  while time.time() <= end:
-    if not _InstanceRunning(node, name):
-      break
-    time.sleep(5)
-  else:
-    raise qa_error.Error("xm shutdown failed")
+  if _InstanceRunning(name):
+    raise qa_error.Error("instance shutdown failed")
 
 
-def _ResetWatcherDaemon(node):
+def _ResetWatcherDaemon():
   """Removes the watcher daemon's state file.
 
-  Args:
-    node: Node to be reset
   """
-  cmd = ['rm', '-f', constants.WATCHER_STATEFILE]
-  AssertEqual(StartSSH(node['primary'],
-                       utils.ShellQuoteArgs(cmd)).wait(), 0)
+  AssertCommand([
+    "bash", "-c",
+    "rm -vf %s" % (constants.WATCHER_GROUP_STATE_FILE % "*-*-*-*")
+    ])
 
 
-def PrintCronWarning():
-  """Shows a warning about the required cron job.
+def _RunWatcherDaemon():
+  """Runs the ganeti-watcher daemon on the master node.
 
   """
-  print
-  print qa_utils.FormatWarning("The following tests require the cron script "
-                               "for ganeti-watcher to be set up.")
+  AssertCommand(["ganeti-watcher", "-d", "--ignore-pause", "--wait-children"])
 
 
-def TestInstanceAutomaticRestart(node, instance):
-  """Test automatic restart of instance by ganeti-watcher.
+def TestPauseWatcher():
+  """Tests and pauses the watcher.
 
-  Note: takes up to 6 minutes to complete.
   """
   master = qa_config.GetMasterNode()
-  inst_name = qa_utils.ResolveInstanceName(instance)
 
-  _ResetWatcherDaemon(node)
-  _XmShutdownInstance(node, inst_name)
+  AssertCommand(["gnt-cluster", "watcher", "pause", "4h"])
 
-  # Give it a bit more than five minutes to start again
-  restart_at = time.time() + 330
+  cmd = ["gnt-cluster", "watcher", "info"]
+  output = GetCommandOutput(master["primary"],
+                            utils.ShellQuoteArgs(cmd))
+  AssertMatch(output, r"^.*\bis paused\b.*")
 
-  # Wait until it's running again
-  while time.time() <= restart_at:
-    if _InstanceRunning(node, inst_name):
-      break
-    time.sleep(15)
-  else:
-    raise qa_error.Error("Daemon didn't restart instance in time")
 
-  cmd = ['gnt-instance', 'info', inst_name]
-  AssertEqual(StartSSH(master['primary'],
-                       utils.ShellQuoteArgs(cmd)).wait(), 0)
+def TestResumeWatcher():
+  """Tests and unpauses the watcher.
 
+  """
+  master = qa_config.GetMasterNode()
 
-def TestInstanceConsecutiveFailures(node, instance):
-  """Test five consecutive instance failures.
+  AssertCommand(["gnt-cluster", "watcher", "continue"])
+
+  cmd = ["gnt-cluster", "watcher", "info"]
+  output = GetCommandOutput(master["primary"],
+                            utils.ShellQuoteArgs(cmd))
+  AssertMatch(output, r"^.*\bis not paused\b.*")
+
+
+def TestInstanceAutomaticRestart(instance):
+  """Test automatic restart of instance by ganeti-watcher.
 
-  Note: takes at least 35 minutes to complete.
   """
-  master = qa_config.GetMasterNode()
-  inst_name = qa_utils.ResolveInstanceName(instance)
+  inst_name = qa_utils.ResolveInstanceName(instance["name"])
+
+  _ResetWatcherDaemon()
+  _ShutdownInstance(inst_name)
+
+  _RunWatcherDaemon()
+  time.sleep(5)
 
-  _ResetWatcherDaemon(node)
-  _XmShutdownInstance(node, inst_name)
+  if not _InstanceRunning(inst_name):
+    raise qa_error.Error("Daemon didn't restart instance")
 
-  # Do shutdowns for 30 minutes
-  finished_at = time.time() + (35 * 60)
+  AssertCommand(["gnt-instance", "info", inst_name])
 
-  while time.time() <= finished_at:
-    if _InstanceRunning(node, inst_name):
-      _XmShutdownInstance(node, inst_name)
-    time.sleep(30)
 
-  # Check for some time whether the instance doesn't start again
-  check_until = time.time() + 330
-  while time.time() <= check_until:
-    if _InstanceRunning(node, inst_name):
-      raise qa_error.Error("Instance started when it shouldn't")
-    time.sleep(30)
+def TestInstanceConsecutiveFailures(instance):
+  """Test five consecutive instance failures.
+
+  """
+  inst_name = qa_utils.ResolveInstanceName(instance["name"])
+
+  _ResetWatcherDaemon()
+
+  for should_start in ([True] * 5) + [False]:
+    _ShutdownInstance(inst_name)
+    _RunWatcherDaemon()
+    time.sleep(5)
+
+    if bool(_InstanceRunning(inst_name)) != should_start:
+      if should_start:
+        msg = "Instance not started when it should"
+      else:
+        msg = "Instance started when it shouldn't"
+      raise qa_error.Error(msg)
 
-  cmd = ['gnt-instance', 'info', inst_name]
-  AssertEqual(StartSSH(master['primary'],
-                       utils.ShellQuoteArgs(cmd)).wait(), 0)
+  AssertCommand(["gnt-instance", "info", inst_name])