QA: Added function to check cluster-verify result
[ganeti-local] / qa / ganeti-qa.py
index 9f12d37..26c2407 100755 (executable)
@@ -34,6 +34,7 @@ import qa_cluster
 import qa_config
 import qa_daemon
 import qa_env
+import qa_error
 import qa_group
 import qa_instance
 import qa_node
@@ -44,10 +45,11 @@ import qa_tags
 import qa_utils
 
 from ganeti import utils
-from ganeti import rapi
+from ganeti import rapi # pylint: disable=W0611
 from ganeti import constants
 
 import ganeti.rapi.client # pylint: disable=W0611
+from ganeti.rapi.client import UsesRapiClient
 
 
 def _FormatHeader(line, end=72):
@@ -72,7 +74,7 @@ def _DescriptionOf(fn):
   return desc.rstrip(".")
 
 
-def RunTest(fn, *args):
+def RunTest(fn, *args, **kwargs):
   """Runs a test after printing a header.
 
   """
@@ -85,7 +87,7 @@ def RunTest(fn, *args):
   print _FormatHeader("%s start %s" % (tstart, desc))
 
   try:
-    retval = fn(*args)
+    retval = fn(*args, **kwargs)
     return retval
   finally:
     tstop = datetime.datetime.now()
@@ -93,7 +95,7 @@ def RunTest(fn, *args):
     print _FormatHeader("%s time=%s %s" % (tstop, tdelta, desc))
 
 
-def RunTestIf(testnames, fn, *args):
+def RunTestIf(testnames, fn, *args, **kwargs):
   """Runs a test conditionally.
 
   @param testnames: either a single test name in the configuration
@@ -101,7 +103,7 @@ def RunTestIf(testnames, fn, *args):
 
   """
   if qa_config.TestEnabled(testnames):
-    RunTest(fn, *args)
+    RunTest(fn, *args, **kwargs)
   else:
     tstart = datetime.datetime.now()
     desc = _DescriptionOf(fn)
@@ -252,7 +254,12 @@ def RunCommonInstanceTests(instance):
   RunTestIf(["instance-console", "rapi"],
             qa_rapi.TestRapiInstanceConsole, instance)
 
-  DOWN_TESTS = ["instance-reinstall", "instance-rename", "instance-grow-disk"]
+  DOWN_TESTS = qa_config.Either([
+    "instance-reinstall",
+    "instance-rename",
+    "instance-grow-disk",
+    ])
+
   # shutdown instance for any 'down' tests
   RunTestIf(DOWN_TESTS, qa_instance.TestInstanceShutdown, instance)
 
@@ -260,25 +267,25 @@ def RunCommonInstanceTests(instance):
   RunTestIf("instance-reinstall", qa_instance.TestInstanceReinstall, instance)
   RunTestIf(["instance-reinstall", "rapi"],
             qa_rapi.TestRapiInstanceReinstall, instance)
-  # RAPI reinstall will leave the instance up by default, so we have
-  # to stop it again
-  RunTestIf(["instance-reinstall", "rapi"],
-            qa_rapi.TestRapiInstanceShutdown, instance)
 
   if qa_config.TestEnabled("instance-rename"):
-    rename_source = instance["name"]
-    rename_target = qa_config.get("rename", None)
-    # perform instance rename to the same name
-    RunTest(qa_instance.TestInstanceRenameAndBack,
-            rename_source, rename_source)
-    RunTestIf("rapi", qa_rapi.TestRapiInstanceRenameAndBack,
-              rename_source, rename_source)
-    if rename_target is not None:
-      # perform instance rename to a different name, if we have one configured
+    tgt_instance = qa_config.AcquireInstance()
+    try:
+      rename_source = instance["name"]
+      rename_target = tgt_instance["name"]
+      # perform instance rename to the same name
       RunTest(qa_instance.TestInstanceRenameAndBack,
-              rename_source, rename_target)
+              rename_source, rename_source)
       RunTestIf("rapi", qa_rapi.TestRapiInstanceRenameAndBack,
+                rename_source, rename_source)
+      if rename_target is not None:
+        # perform instance rename to a different name, if we have one configured
+        RunTest(qa_instance.TestInstanceRenameAndBack,
                 rename_source, rename_target)
+        RunTestIf("rapi", qa_rapi.TestRapiInstanceRenameAndBack,
+                  rename_source, rename_target)
+    finally:
+      qa_config.ReleaseInstance(tgt_instance)
 
   RunTestIf(["instance-grow-disk"], qa_instance.TestInstanceGrowDisk, instance)
 
@@ -349,8 +356,10 @@ def RunExportImportTests(instance, pnode, snode):
       if qa_config.TestEnabled("instance-import"):
         newinst = qa_config.AcquireInstance()
         try:
-          RunTest(qa_instance.TestInstanceImport, pnode, newinst,
+          RunTest(qa_instance.TestInstanceImport, newinst, pnode,
                   expnode, name)
+          # Check if starting the instance works
+          RunTest(qa_instance.TestInstanceStartup, newinst)
           RunTest(qa_instance.TestInstanceRemove, newinst)
         finally:
           qa_config.ReleaseInstance(newinst)
@@ -388,6 +397,19 @@ def RunDaemonTests(instance):
   RunTest(qa_daemon.TestResumeWatcher)
 
 
+def RunSingleHomedHardwareFailureTests(instance, pnode):
+  """Test hardware failure recovery for single-homed instances.
+
+  """
+  if qa_config.TestEnabled("instance-recreate-disks"):
+    othernode = qa_config.AcquireNode(exclude=[pnode])
+    try:
+      RunTest(qa_instance.TestRecreateDisks,
+              instance, pnode, None, [othernode])
+    finally:
+      qa_config.ReleaseNode(othernode)
+
+
 def RunHardwareFailureTests(instance, pnode, snode):
   """Test cluster internal hardware failure recovery.
 
@@ -409,6 +431,21 @@ def RunHardwareFailureTests(instance, pnode, snode):
     finally:
       qa_config.ReleaseNode(othernode)
 
+  if qa_config.TestEnabled("instance-recreate-disks"):
+    othernode1 = qa_config.AcquireNode(exclude=[pnode, snode])
+    try:
+      othernode2 = qa_config.AcquireNode(exclude=[pnode, snode, othernode1])
+    except qa_error.OutOfNodesError:
+      # Let's reuse one of the nodes if the cluster is not big enough
+      othernode2 = pnode
+    try:
+      RunTest(qa_instance.TestRecreateDisks,
+              instance, pnode, snode, [othernode1, othernode2])
+    finally:
+      qa_config.ReleaseNode(othernode1)
+      if othernode2 != pnode:
+        qa_config.ReleaseNode(othernode2)
+
   RunTestIf("node-evacuate", qa_node.TestNodeEvacuate, pnode, snode)
 
   RunTestIf("node-failover", qa_node.TestNodeFailover, pnode, snode)
@@ -442,6 +479,8 @@ def RunQa():
   RunGroupListTests()
   RunGroupRwTests()
 
+  # The master shouldn't be readded or put offline; "delay" needs a non-master
+  # node to test
   pnode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
   try:
     RunTestIf("node-readd", qa_node.TestNodeReadd, pnode)
@@ -461,7 +500,8 @@ def RunQa():
         for use_client in [True, False]:
           rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode,
                                   use_client)
-          RunCommonInstanceTests(rapi_instance)
+          if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
+            RunCommonInstanceTests(rapi_instance)
           RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
           del rapi_instance
 
@@ -473,6 +513,7 @@ def RunQa():
       RunExportImportTests(instance, pnode, None)
       RunDaemonTests(instance)
       RunRepairDiskSizes()
+      RunSingleHomedHardwareFailureTests(instance, pnode)
       RunTest(qa_instance.TestInstanceRemove, instance)
       del instance
 
@@ -486,11 +527,13 @@ def RunQa():
         snode = qa_config.AcquireNode(exclude=pnode)
         try:
           instance = RunTest(func, pnode, snode)
+          RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, pnode)
+          RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, snode)
           RunCommonInstanceTests(instance)
           RunGroupListTests()
-          RunTest(qa_group.TestAssignNodesIncludingSplit,
-                  constants.INITIAL_NODE_GROUP_NAME,
-                  pnode["primary"], snode["primary"])
+          RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit,
+                    constants.INITIAL_NODE_GROUP_NAME,
+                    pnode["primary"], snode["primary"])
           if qa_config.TestEnabled("instance-convert-disk"):
             RunTest(qa_instance.TestInstanceShutdown, instance)
             RunTest(qa_instance.TestInstanceConvertDisk, instance, snode)
@@ -503,6 +546,29 @@ def RunQa():
         finally:
           qa_config.ReleaseNode(snode)
 
+  finally:
+    qa_config.ReleaseNode(pnode)
+
+  # Test removing instance with offline drbd secondary
+  if qa_config.TestEnabled("instance-remove-drbd-offline"):
+    # Make sure the master is not put offline
+    snode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
+    try:
+      pnode = qa_config.AcquireNode(exclude=snode)
+      try:
+        instance = qa_instance.TestInstanceAddWithDrbdDisk(pnode, snode)
+        qa_node.MakeNodeOffline(snode, "yes")
+        try:
+          RunTest(qa_instance.TestInstanceRemove, instance)
+        finally:
+          qa_node.MakeNodeOffline(snode, "no")
+      finally:
+        qa_config.ReleaseNode(pnode)
+    finally:
+      qa_config.ReleaseNode(snode)
+
+  pnode = qa_config.AcquireNode()
+  try:
     if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]):
       for shutdown in [False, True]:
         instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, pnode)
@@ -526,15 +592,15 @@ def RunQa():
   RunTestIf("cluster-destroy", qa_cluster.TestClusterDestroy)
 
 
-@rapi.client.UsesRapiClient
+@UsesRapiClient
 def main():
   """Main program.
 
   """
   parser = optparse.OptionParser(usage="%prog [options] <config-file>")
   parser.add_option("--yes-do-it", dest="yes_do_it",
-      action="store_true",
-      help="Really execute the tests")
+                    action="store_true",
+                    help="Really execute the tests")
   (qa_config.options, args) = parser.parse_args()
 
   if len(args) == 1:
@@ -550,7 +616,12 @@ def main():
 
   qa_config.Load(config_file)
 
-  qa_utils.StartMultiplexer(qa_config.GetMasterNode()["primary"])
+  primary = qa_config.GetMasterNode()["primary"]
+  qa_utils.StartMultiplexer(primary)
+  print ("SSH command for primary node: %s" %
+         utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, "")))
+  print ("SSH command for other nodes: %s" %
+         utils.ShellQuoteArgs(qa_utils.GetSSHCommand("NODE", "")))
   try:
     RunQa()
   finally: