QA: Added function to check cluster-verify result
[ganeti-local] / qa / ganeti-qa.py
index ecd7ee4..26c2407 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/python -u
 #
 
-# Copyright (C) 2007, 2008, 2009, 2010, 2011 Google Inc.
+# Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -23,7 +23,7 @@
 
 """
 
-# pylint: disable-msg=C0103
+# pylint: disable=C0103
 # due to invalid name
 
 import sys
@@ -34,19 +34,22 @@ import qa_cluster
 import qa_config
 import qa_daemon
 import qa_env
+import qa_error
 import qa_group
 import qa_instance
 import qa_node
 import qa_os
+import qa_job
 import qa_rapi
 import qa_tags
 import qa_utils
 
 from ganeti import utils
-from ganeti import rapi
+from ganeti import rapi # pylint: disable=W0611
 from ganeti import constants
 
-import ganeti.rapi.client # pylint: disable-msg=W0611
+import ganeti.rapi.client # pylint: disable=W0611
+from ganeti.rapi.client import UsesRapiClient
 
 
 def _FormatHeader(line, end=72):
@@ -54,7 +57,7 @@ def _FormatHeader(line, end=72):
 
   """
   line = "---- " + line + " "
-  line += "-" * (end-len(line))
+  line += "-" * (end - len(line))
   line = line.rstrip()
   return line
 
@@ -71,7 +74,7 @@ def _DescriptionOf(fn):
   return desc.rstrip(".")
 
 
-def RunTest(fn, *args):
+def RunTest(fn, *args, **kwargs):
   """Runs a test after printing a header.
 
   """
@@ -84,7 +87,7 @@ def RunTest(fn, *args):
   print _FormatHeader("%s start %s" % (tstart, desc))
 
   try:
-    retval = fn(*args)
+    retval = fn(*args, **kwargs)
     return retval
   finally:
     tstop = datetime.datetime.now()
@@ -92,7 +95,7 @@ def RunTest(fn, *args):
     print _FormatHeader("%s time=%s %s" % (tstop, tdelta, desc))
 
 
-def RunTestIf(testnames, fn, *args):
+def RunTestIf(testnames, fn, *args, **kwargs):
   """Runs a test conditionally.
 
   @param testnames: either a single test name in the configuration
@@ -100,7 +103,7 @@ def RunTestIf(testnames, fn, *args):
 
   """
   if qa_config.TestEnabled(testnames):
-    RunTest(fn, *args)
+    RunTest(fn, *args, **kwargs)
   else:
     tstart = datetime.datetime.now()
     desc = _DescriptionOf(fn)
@@ -130,6 +133,7 @@ def SetupCluster(rapi_user, rapi_secret):
   # Test on empty cluster
   RunTestIf("node-list", qa_node.TestNodeList)
   RunTestIf("instance-list", qa_instance.TestInstanceList)
+  RunTestIf("job-list", qa_job.TestJobList)
 
   RunTestIf("create-cluster", qa_node.TestNodeAddAll)
   if not qa_config.TestEnabled("create-cluster"):
@@ -146,6 +150,8 @@ def SetupCluster(rapi_user, rapi_secret):
   # Test listing fields
   RunTestIf("node-list", qa_node.TestNodeListFields)
   RunTestIf("instance-list", qa_instance.TestInstanceListFields)
+  RunTestIf("job-list", qa_job.TestJobListFields)
+  RunTestIf("instance-export", qa_instance.TestBackupListFields)
 
   RunTestIf("node-info", qa_node.TestNodeInfo)
 
@@ -155,15 +161,19 @@ def RunClusterTests():
 
   """
   for test, fn in [
+    ("create-cluster", qa_cluster.TestClusterInitDisk),
     ("cluster-renew-crypto", qa_cluster.TestClusterRenewCrypto),
     ("cluster-verify", qa_cluster.TestClusterVerify),
     ("cluster-reserved-lvs", qa_cluster.TestClusterReservedLvs),
     # TODO: add more cluster modify tests
+    ("cluster-modify", qa_cluster.TestClusterModifyEmpty),
     ("cluster-modify", qa_cluster.TestClusterModifyBe),
+    ("cluster-modify", qa_cluster.TestClusterModifyDisk),
     ("cluster-rename", qa_cluster.TestClusterRename),
     ("cluster-info", qa_cluster.TestClusterVersion),
     ("cluster-info", qa_cluster.TestClusterInfo),
     ("cluster-info", qa_cluster.TestClusterGetmaster),
+    ("cluster-redist-conf", qa_cluster.TestClusterRedistConf),
     ("cluster-copyfile", qa_cluster.TestClusterCopyfile),
     ("cluster-command", qa_cluster.TestClusterCommand),
     ("cluster-burnin", qa_cluster.TestClusterBurnin),
@@ -178,6 +188,13 @@ def RunClusterTests():
     RunTestIf(test, fn)
 
 
+def RunRepairDiskSizes():
+  """Run the repair disk-sizes test.
+
+  """
+  RunTestIf("cluster-repair-disk-sizes", qa_cluster.TestClusterRepairDiskSizes)
+
+
 def RunOsTests():
   """Runs all tests related to gnt-os.
 
@@ -215,8 +232,16 @@ def RunCommonInstanceTests(instance):
   RunTestIf("instance-shutdown", qa_instance.TestInstanceShutdown, instance)
   RunTestIf(["instance-shutdown", "instance-console", "rapi"],
             qa_rapi.TestRapiStoppedInstanceConsole, instance)
+  RunTestIf(["instance-shutdown", "instance-modify"],
+            qa_instance.TestInstanceStoppedModify, instance)
   RunTestIf("instance-shutdown", qa_instance.TestInstanceStartup, instance)
 
+  # Test shutdown/start via RAPI
+  RunTestIf(["instance-shutdown", "rapi"],
+            qa_rapi.TestRapiInstanceShutdown, instance)
+  RunTestIf(["instance-shutdown", "rapi"],
+            qa_rapi.TestRapiInstanceStartup, instance)
+
   RunTestIf("instance-list", qa_instance.TestInstanceList)
 
   RunTestIf("instance-info", qa_instance.TestInstanceInfo, instance)
@@ -229,31 +254,45 @@ def RunCommonInstanceTests(instance):
   RunTestIf(["instance-console", "rapi"],
             qa_rapi.TestRapiInstanceConsole, instance)
 
-  RunTestIf("instance-reinstall", qa_instance.TestInstanceShutdown, instance)
+  DOWN_TESTS = qa_config.Either([
+    "instance-reinstall",
+    "instance-rename",
+    "instance-grow-disk",
+    ])
+
+  # shutdown instance for any 'down' tests
+  RunTestIf(DOWN_TESTS, qa_instance.TestInstanceShutdown, instance)
+
+  # now run the 'down' state tests
   RunTestIf("instance-reinstall", qa_instance.TestInstanceReinstall, instance)
   RunTestIf(["instance-reinstall", "rapi"],
             qa_rapi.TestRapiInstanceReinstall, instance)
-  RunTestIf("instance-reinstall", qa_instance.TestInstanceStartup, instance)
 
-  RunTestIf("instance-reboot", qa_instance.TestInstanceReboot, instance)
-
-  if qa_config.TestEnabled('instance-rename'):
-    rename_source = instance["name"]
-    rename_target = qa_config.get("rename", None)
-    RunTest(qa_instance.TestInstanceShutdown, instance)
-    # perform instance rename to the same name
-    RunTest(qa_instance.TestInstanceRename, rename_source, rename_source)
-    RunTestIf("rapi", qa_rapi.TestRapiInstanceRename,
+  if qa_config.TestEnabled("instance-rename"):
+    tgt_instance = qa_config.AcquireInstance()
+    try:
+      rename_source = instance["name"]
+      rename_target = tgt_instance["name"]
+      # perform instance rename to the same name
+      RunTest(qa_instance.TestInstanceRenameAndBack,
               rename_source, rename_source)
-    if rename_target is not None:
-      # perform instance rename to a different name, if we have one configured
-      RunTest(qa_instance.TestInstanceRename, rename_source, rename_target)
-      RunTest(qa_instance.TestInstanceRename, rename_target, rename_source)
-      RunTestIf("rapi", qa_rapi.TestRapiInstanceRename,
+      RunTestIf("rapi", qa_rapi.TestRapiInstanceRenameAndBack,
+                rename_source, rename_source)
+      if rename_target is not None:
+        # perform instance rename to a different name, if we have one configured
+        RunTest(qa_instance.TestInstanceRenameAndBack,
                 rename_source, rename_target)
-      RunTestIf("rapi", qa_rapi.TestRapiInstanceRename,
-                rename_target, rename_source)
-    RunTest(qa_instance.TestInstanceStartup, instance)
+        RunTestIf("rapi", qa_rapi.TestRapiInstanceRenameAndBack,
+                  rename_source, rename_target)
+    finally:
+      qa_config.ReleaseInstance(tgt_instance)
+
+  RunTestIf(["instance-grow-disk"], qa_instance.TestInstanceGrowDisk, instance)
+
+  # and now start the instance again
+  RunTestIf(DOWN_TESTS, qa_instance.TestInstanceStartup, instance)
+
+  RunTestIf("instance-reboot", qa_instance.TestInstanceReboot, instance)
 
   RunTestIf("tags", qa_tags.TestInstanceTags, instance)
 
@@ -264,6 +303,9 @@ def RunCommonInstanceTests(instance):
   # Lists instances, too
   RunTestIf("node-list", qa_node.TestNodeList)
 
+  # Some jobs have been run, let's test listing them
+  RunTestIf("job-list", qa_job.TestJobList)
+
 
 def RunCommonNodeTests():
   """Run a few common node tests.
@@ -290,6 +332,8 @@ def RunGroupRwTests():
   RunTestIf("group-rwops", qa_group.TestGroupAddWithOptions)
   RunTestIf("group-rwops", qa_group.TestGroupModify)
   RunTestIf(["group-rwops", "rapi"], qa_rapi.TestRapiNodeGroups)
+  RunTestIf(["group-rwops", "tags"], qa_tags.TestGroupTags,
+            qa_group.GetDefaultGroup())
 
 
 def RunExportImportTests(instance, pnode, snode):
@@ -300,7 +344,7 @@ def RunExportImportTests(instance, pnode, snode):
       otherwise None
 
   """
-  if qa_config.TestEnabled('instance-export'):
+  if qa_config.TestEnabled("instance-export"):
     RunTest(qa_instance.TestInstanceExportNoTarget, instance)
 
     expnode = qa_config.AcquireNode(exclude=pnode)
@@ -309,11 +353,13 @@ def RunExportImportTests(instance, pnode, snode):
 
       RunTest(qa_instance.TestBackupList, expnode)
 
-      if qa_config.TestEnabled('instance-import'):
+      if qa_config.TestEnabled("instance-import"):
         newinst = qa_config.AcquireInstance()
         try:
-          RunTest(qa_instance.TestInstanceImport, pnode, newinst,
+          RunTest(qa_instance.TestInstanceImport, newinst, pnode,
                   expnode, name)
+          # Check if starting the instance works
+          RunTest(qa_instance.TestInstanceStartup, newinst)
           RunTest(qa_instance.TestInstanceRemove, newinst)
         finally:
           qa_config.ReleaseInstance(newinst)
@@ -337,38 +383,69 @@ def RunExportImportTests(instance, pnode, snode):
       qa_config.ReleaseInstance(newinst)
 
 
-def RunDaemonTests(instance, pnode):
+def RunDaemonTests(instance):
   """Test the ganeti-watcher script.
 
   """
   RunTest(qa_daemon.TestPauseWatcher)
 
   RunTestIf("instance-automatic-restart",
-            qa_daemon.TestInstanceAutomaticRestart, pnode, instance)
+            qa_daemon.TestInstanceAutomaticRestart, instance)
   RunTestIf("instance-consecutive-failures",
-            qa_daemon.TestInstanceConsecutiveFailures, pnode, instance)
+            qa_daemon.TestInstanceConsecutiveFailures, instance)
 
   RunTest(qa_daemon.TestResumeWatcher)
 
 
+def RunSingleHomedHardwareFailureTests(instance, pnode):
+  """Test hardware failure recovery for single-homed instances.
+
+  """
+  if qa_config.TestEnabled("instance-recreate-disks"):
+    othernode = qa_config.AcquireNode(exclude=[pnode])
+    try:
+      RunTest(qa_instance.TestRecreateDisks,
+              instance, pnode, None, [othernode])
+    finally:
+      qa_config.ReleaseNode(othernode)
+
+
 def RunHardwareFailureTests(instance, pnode, snode):
   """Test cluster internal hardware failure recovery.
 
   """
   RunTestIf("instance-failover", qa_instance.TestInstanceFailover, instance)
+  RunTestIf(["instance-failover", "rapi"],
+            qa_rapi.TestRapiInstanceFailover, instance)
 
   RunTestIf("instance-migrate", qa_instance.TestInstanceMigrate, instance)
   RunTestIf(["instance-migrate", "rapi"],
             qa_rapi.TestRapiInstanceMigrate, instance)
 
-  if qa_config.TestEnabled('instance-replace-disks'):
+  if qa_config.TestEnabled("instance-replace-disks"):
     othernode = qa_config.AcquireNode(exclude=[pnode, snode])
     try:
+      RunTestIf("rapi", qa_rapi.TestRapiInstanceReplaceDisks, instance)
       RunTest(qa_instance.TestReplaceDisks,
               instance, pnode, snode, othernode)
     finally:
       qa_config.ReleaseNode(othernode)
 
+  if qa_config.TestEnabled("instance-recreate-disks"):
+    othernode1 = qa_config.AcquireNode(exclude=[pnode, snode])
+    try:
+      othernode2 = qa_config.AcquireNode(exclude=[pnode, snode, othernode1])
+    except qa_error.OutOfNodesError:
+      # Let's reuse one of the nodes if the cluster is not big enough
+      othernode2 = pnode
+    try:
+      RunTest(qa_instance.TestRecreateDisks,
+              instance, pnode, snode, [othernode1, othernode2])
+    finally:
+      qa_config.ReleaseNode(othernode1)
+      if othernode2 != pnode:
+        qa_config.ReleaseNode(othernode2)
+
   RunTestIf("node-evacuate", qa_node.TestNodeEvacuate, pnode, snode)
 
   RunTestIf("node-failover", qa_node.TestNodeFailover, pnode, snode)
@@ -402,10 +479,13 @@ def RunQa():
   RunGroupListTests()
   RunGroupRwTests()
 
+  # The master shouldn't be readded or put offline; "delay" needs a non-master
+  # node to test
   pnode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
   try:
     RunTestIf("node-readd", qa_node.TestNodeReadd, pnode)
     RunTestIf("node-modify", qa_node.TestNodeModify, pnode)
+    RunTestIf("delay", qa_cluster.TestDelay, pnode)
   finally:
     qa_config.ReleaseNode(pnode)
 
@@ -420,22 +500,25 @@ def RunQa():
         for use_client in [True, False]:
           rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode,
                                   use_client)
-          RunCommonInstanceTests(rapi_instance)
+          if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
+            RunCommonInstanceTests(rapi_instance)
           RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
           del rapi_instance
 
-    if qa_config.TestEnabled('instance-add-plain-disk'):
+    if qa_config.TestEnabled("instance-add-plain-disk"):
       instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, pnode)
       RunCommonInstanceTests(instance)
       RunGroupListTests()
-      RunTest(qa_cluster.TestClusterEpo)
+      RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
       RunExportImportTests(instance, pnode, None)
-      RunDaemonTests(instance, pnode)
+      RunDaemonTests(instance)
+      RunRepairDiskSizes()
+      RunSingleHomedHardwareFailureTests(instance, pnode)
       RunTest(qa_instance.TestInstanceRemove, instance)
       del instance
 
     multinode_tests = [
-      ('instance-add-drbd-disk',
+      ("instance-add-drbd-disk",
        qa_instance.TestInstanceAddWithDrbdDisk),
     ]
 
@@ -444,22 +527,48 @@ def RunQa():
         snode = qa_config.AcquireNode(exclude=pnode)
         try:
           instance = RunTest(func, pnode, snode)
+          RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, pnode)
+          RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, snode)
           RunCommonInstanceTests(instance)
           RunGroupListTests()
-          RunTest(qa_group.TestAssignNodesIncludingSplit,
-                  constants.INITIAL_NODE_GROUP_NAME,
-                  pnode["primary"], snode["primary"])
-          if qa_config.TestEnabled('instance-convert-disk'):
+          RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit,
+                    constants.INITIAL_NODE_GROUP_NAME,
+                    pnode["primary"], snode["primary"])
+          if qa_config.TestEnabled("instance-convert-disk"):
             RunTest(qa_instance.TestInstanceShutdown, instance)
             RunTest(qa_instance.TestInstanceConvertDisk, instance, snode)
             RunTest(qa_instance.TestInstanceStartup, instance)
           RunExportImportTests(instance, pnode, snode)
           RunHardwareFailureTests(instance, pnode, snode)
+          RunRepairDiskSizes()
           RunTest(qa_instance.TestInstanceRemove, instance)
           del instance
         finally:
           qa_config.ReleaseNode(snode)
 
+  finally:
+    qa_config.ReleaseNode(pnode)
+
+  # Test removing instance with offline drbd secondary
+  if qa_config.TestEnabled("instance-remove-drbd-offline"):
+    # Make sure the master is not put offline
+    snode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
+    try:
+      pnode = qa_config.AcquireNode(exclude=snode)
+      try:
+        instance = qa_instance.TestInstanceAddWithDrbdDisk(pnode, snode)
+        qa_node.MakeNodeOffline(snode, "yes")
+        try:
+          RunTest(qa_instance.TestInstanceRemove, instance)
+        finally:
+          qa_node.MakeNodeOffline(snode, "no")
+      finally:
+        qa_config.ReleaseNode(pnode)
+    finally:
+      qa_config.ReleaseNode(snode)
+
+  pnode = qa_config.AcquireNode()
+  try:
     if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]):
       for shutdown in [False, True]:
         instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, pnode)
@@ -483,15 +592,15 @@ def RunQa():
   RunTestIf("cluster-destroy", qa_cluster.TestClusterDestroy)
 
 
-@rapi.client.UsesRapiClient
+@UsesRapiClient
 def main():
   """Main program.
 
   """
   parser = optparse.OptionParser(usage="%prog [options] <config-file>")
-  parser.add_option('--yes-do-it', dest='yes_do_it',
-      action="store_true",
-      help="Really execute the tests")
+  parser.add_option("--yes-do-it", dest="yes_do_it",
+                    action="store_true",
+                    help="Really execute the tests")
   (qa_config.options, args) = parser.parse_args()
 
   if len(args) == 1:
@@ -507,11 +616,16 @@ def main():
 
   qa_config.Load(config_file)
 
-  qa_utils.StartMultiplexer(qa_config.GetMasterNode()["primary"])
+  primary = qa_config.GetMasterNode()["primary"]
+  qa_utils.StartMultiplexer(primary)
+  print ("SSH command for primary node: %s" %
+         utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, "")))
+  print ("SSH command for other nodes: %s" %
+         utils.ShellQuoteArgs(qa_utils.GetSSHCommand("NODE", "")))
   try:
     RunQa()
   finally:
     qa_utils.CloseMultiplexers()
 
-if __name__ == '__main__':
+if __name__ == "__main__":
   main()