Run ganeti-watcher script from QA code instead of cron.
authorMichael Hanselmann <hansmi@google.com>
Tue, 13 Nov 2007 19:31:54 +0000 (19:31 +0000)
committerMichael Hanselmann <hansmi@google.com>
Tue, 13 Nov 2007 19:31:54 +0000 (19:31 +0000)
This makes the tests much more reliably because it avoids race conditions.
It also helps to speed them up a lot.

Reviewed-by: iustinp

qa/qa-sample.yaml
qa/qa_daemon.py

index 75bbd38..0285d18 100644 (file)
@@ -55,18 +55,14 @@ tests:
   instance-import: True
   instance-reinstall: True
   instance-shutdown: True
+  instance-automatic-restart: False
+  instance-consecutive-failures: False
 
   # Make sure not to include the disk(s) required for Dom0 to be included in
   # the volume group used for instances. Otherwise the whole system may stop
   # working until restarted.
   instance-disk-failure: False
 
-  # This test takes up to 6 minutes to complete
-  instance-automatic-restart: False
-
-  # This test takes at least 35 minutes to complete
-  instance-consecutive-failures: False
-
 # Other settings
 options:
   burnin-instances: 2
index 413e5d7..6d0d706 100644 (file)
@@ -67,47 +67,55 @@ def _XmShutdownInstance(node, name):
     raise qa_error.Error("xm shutdown failed")
 
 
-def _ResetWatcherDaemon(node):
+def _ResetWatcherDaemon():
   """Removes the watcher daemon's state file.
 
   Args:
     node: Node to be reset
   """
+  master = qa_config.GetMasterNode()
+
   cmd = ['rm', '-f', constants.WATCHER_STATEFILE]
-  AssertEqual(StartSSH(node['primary'],
+  AssertEqual(StartSSH(master['primary'],
+                       utils.ShellQuoteArgs(cmd)).wait(), 0)
+
+
+def _RunWatcherDaemon():
+  """Runs the ganeti-watcher daemon on the master node.
+
+  """
+  master = qa_config.GetMasterNode()
+
+  cmd = ['ganeti-watcher', '-d']
+  AssertEqual(StartSSH(master['primary'],
                        utils.ShellQuoteArgs(cmd)).wait(), 0)
 
 
 def PrintCronWarning():
-  """Shows a warning about the required cron job.
+  """Shows a warning about the cron job.
 
   """
+  msg = ("For the following tests it's recommended to turn off the "
+         "ganeti-watcher cronjob.")
   print
-  print qa_utils.FormatWarning("The following tests require the cron script "
-                               "for ganeti-watcher to be set up.")
+  print qa_utils.FormatWarning(msg)
 
 
 def TestInstanceAutomaticRestart(node, instance):
   """Test automatic restart of instance by ganeti-watcher.
 
-  Note: takes up to 6 minutes to complete.
   """
   master = qa_config.GetMasterNode()
   inst_name = qa_utils.ResolveInstanceName(instance)
 
-  _ResetWatcherDaemon(node)
+  _ResetWatcherDaemon()
   _XmShutdownInstance(node, inst_name)
 
-  # Give it a bit more than five minutes to start again
-  restart_at = time.time() + 330
+  _RunWatcherDaemon()
+  time.sleep(5)
 
-  # Wait until it's running again
-  while time.time() <= restart_at:
-    if _InstanceRunning(node, inst_name):
-      break
-    time.sleep(15)
-  else:
-    raise qa_error.Error("Daemon didn't restart instance in time")
+  if not _InstanceRunning(node, inst_name):
+    raise qa_error.Error("Daemon didn't restart instance")
 
   cmd = ['gnt-instance', 'info', inst_name]
   AssertEqual(StartSSH(master['primary'],
@@ -117,28 +125,23 @@ def TestInstanceAutomaticRestart(node, instance):
 def TestInstanceConsecutiveFailures(node, instance):
   """Test five consecutive instance failures.
 
-  Note: takes at least 35 minutes to complete.
   """
   master = qa_config.GetMasterNode()
   inst_name = qa_utils.ResolveInstanceName(instance)
 
-  _ResetWatcherDaemon(node)
-  _XmShutdownInstance(node, inst_name)
-
-  # Do shutdowns for 30 minutes
-  finished_at = time.time() + (35 * 60)
+  _ResetWatcherDaemon()
 
-  while time.time() <= finished_at:
-    if _InstanceRunning(node, inst_name):
-      _XmShutdownInstance(node, inst_name)
-    time.sleep(30)
+  for should_start in ([True] * 5) + [False]:
+    _XmShutdownInstance(node, inst_name)
+    _RunWatcherDaemon()
+    time.sleep(5)
 
-  # Check for some time whether the instance doesn't start again
-  check_until = time.time() + 330
-  while time.time() <= check_until:
-    if _InstanceRunning(node, inst_name):
-      raise qa_error.Error("Instance started when it shouldn't")
-    time.sleep(30)
+    if bool(_InstanceRunning(node, inst_name)) != should_start:
+      if should_start:
+        msg = "Instance not started when it should"
+      else:
+        msg = "Instance started when it shouldn't"
+      raise qa_error.Error(msg)
 
   cmd = ['gnt-instance', 'info', inst_name]
   AssertEqual(StartSSH(master['primary'],