Cleanup QA scripts.
authorMichael Hanselmann <hansmi@google.com>
Thu, 1 Nov 2007 13:54:42 +0000 (13:54 +0000)
committerMichael Hanselmann <hansmi@google.com>
Thu, 1 Nov 2007 13:54:42 +0000 (13:54 +0000)
- Split main() function into several small ones.
- Current work on disk failure tests. This is not yet finished.
- Fix small typo in qa_node.py.

Reviewed-by: schreiberal

qa/ganeti-qa.py
qa/qa_instance.py
qa/qa_node.py
qa/qa_other.py

index 3c85242..306eccb 100755 (executable)
@@ -60,44 +60,32 @@ def RunTest(fn, *args):
   return fn(*args)
 
 
-def main():
-  """Main program.
+def RunEnvTests():
+  """Run several environment tests.
 
   """
-  parser = OptionParser(usage="%prog [options] <config-file> "
-                              "<known-hosts-file>")
-  parser.add_option('--dry-run', dest='dry_run',
-      action="store_true",
-      help="Show what would be done")
-  parser.add_option('--yes-do-it', dest='yes_do_it',
-      action="store_true",
-      help="Really execute the tests")
-  (qa_config.options, args) = parser.parse_args()
+  if not qa_config.TestEnabled('env'):
+    return
 
-  if len(args) == 2:
-    (config_file, known_hosts_file) = args
-  else:
-    parser.error("Not enough arguments.")
-
-  if not qa_config.options.yes_do_it:
-    print ("Executing this script irreversibly destroys any Ganeti\n"
-           "configuration on all nodes involved. If you really want\n"
-           "to start testing, supply the --yes-do-it option.")
-    sys.exit(1)
-
-  qa_config.Load(config_file)
+  RunTest(qa_env.TestSshConnection)
+  RunTest(qa_env.TestIcmpPing)
+  RunTest(qa_env.TestGanetiCommands)
 
-  RunTest(qa_other.TestUploadKnownHostsFile, known_hosts_file)
 
-  if qa_config.TestEnabled('env'):
-    RunTest(qa_env.TestSshConnection)
-    RunTest(qa_env.TestIcmpPing)
-    RunTest(qa_env.TestGanetiCommands)
+def SetupCluster():
+  """Initializes the cluster.
 
+  """
   RunTest(qa_cluster.TestClusterInit)
-
   RunTest(qa_node.TestNodeAddAll)
+  if qa_config.TestEnabled('node-info'):
+    RunTest(qa_node.TestNodeInfo)
+
+
+def RunClusterTests():
+  """Runs tests related to gnt-cluster.
 
+  """
   if qa_config.TestEnabled('cluster-verify'):
     RunTest(qa_cluster.TestClusterVerify)
 
@@ -113,134 +101,178 @@ def main():
   if qa_config.TestEnabled('cluster-copyfile'):
     RunTest(qa_cluster.TestClusterCopyfile)
 
-  if qa_config.TestEnabled('node-info'):
-    RunTest(qa_node.TestNodeInfo)
-
   if qa_config.TestEnabled('cluster-burnin'):
     RunTest(qa_cluster.TestClusterBurnin)
 
   if qa_config.TestEnabled('cluster-master-failover'):
     RunTest(qa_cluster.TestClusterMasterFailover)
 
-  if qa_config.TestEnabled('os'):
-    RunTest(qa_os.TestOsList)
-    RunTest(qa_os.TestOsDiagnose)
-    RunTest(qa_os.TestOsValid)
-    RunTest(qa_os.TestOsInvalid)
-    RunTest(qa_os.TestOsPartiallyValid)
 
-  node = qa_config.AcquireNode()
-  try:
-    if qa_config.TestEnabled('instance-add-plain-disk'):
-      instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, node)
+def RunOsTests():
+  """Runs all tests related to gnt-os.
 
-      if qa_config.TestEnabled('instance-shutdown'):
-        RunTest(qa_instance.TestInstanceShutdown, instance)
-        RunTest(qa_instance.TestInstanceStartup, instance)
+  """
+  if not qa_config.TestEnabled('os'):
+    return
+
+  RunTest(qa_os.TestOsList)
+  RunTest(qa_os.TestOsDiagnose)
+  RunTest(qa_os.TestOsValid)
+  RunTest(qa_os.TestOsInvalid)
+  RunTest(qa_os.TestOsPartiallyValid)
+
+
+def RunCommonInstanceTests(instance):
+  """Runs a few tests that are common to all disk types.
+
+  """
+  if qa_config.TestEnabled('instance-shutdown'):
+    RunTest(qa_instance.TestInstanceShutdown, instance)
+    RunTest(qa_instance.TestInstanceStartup, instance)
 
-      if qa_config.TestEnabled('instance-list'):
-        RunTest(qa_instance.TestInstanceList)
+  if qa_config.TestEnabled('instance-list'):
+    RunTest(qa_instance.TestInstanceList)
 
-      if qa_config.TestEnabled('instance-info'):
-        RunTest(qa_instance.TestInstanceInfo, instance)
+  if qa_config.TestEnabled('instance-info'):
+    RunTest(qa_instance.TestInstanceInfo, instance)
 
-      automatic_restart = \
-        qa_config.TestEnabled('instance-automatic-restart')
-      consecutive_failures = \
-        qa_config.TestEnabled('instance-consecutive-failures')
+  if qa_config.TestEnabled('instance-reinstall'):
+    RunTest(qa_instance.TestInstanceShutdown, instance)
+    RunTest(qa_instance.TestInstanceReinstall, instance)
+    RunTest(qa_instance.TestInstanceStartup, instance)
 
-      if automatic_restart or consecutive_failures:
-        qa_daemon.PrintCronWarning()
+  if qa_config.TestEnabled('node-volumes'):
+    RunTest(qa_node.TestNodeVolumes)
 
-        if automatic_restart:
-          RunTest(qa_daemon.TestInstanceAutomaticRestart, node, instance)
 
-        if consecutive_failures:
-          RunTest(qa_daemon.TestInstanceConsecutiveFailures, node, instance)
+def RunExportImportTests(instance, pnode):
+  """Tries to export and import the instance.
 
-      if qa_config.TestEnabled('instance-export'):
-        expnode = qa_config.AcquireNode(exclude=node)
+  """
+  if qa_config.TestEnabled('instance-export'):
+    expnode = qa_config.AcquireNode(exclude=pnode)
+    try:
+      name = RunTest(qa_instance.TestInstanceExport, instance, expnode)
+
+      RunTest(qa_instance.TestBackupList, expnode)
+
+      if qa_config.TestEnabled('instance-import'):
+        newinst = qa_config.AcquireInstance()
         try:
-          name = RunTest(qa_instance.TestInstanceExport, instance, expnode)
-
-          RunTest(qa_instance.TestBackupList, expnode)
-
-          if qa_config.TestEnabled('instance-import'):
-            newinst = qa_config.AcquireInstance()
-            try:
-              RunTest(qa_instance.TestInstanceImport, node, newinst,
-                      expnode, name)
-              RunTest(qa_instance.TestInstanceRemove, newinst)
-            finally:
-              qa_config.ReleaseInstance(newinst)
+          RunTest(qa_instance.TestInstanceImport, pnode, newinst,
+                  expnode, name)
+          RunTest(qa_instance.TestInstanceRemove, newinst)
         finally:
-          qa_config.ReleaseNode(expnode)
+          qa_config.ReleaseInstance(newinst)
+    finally:
+      qa_config.ReleaseNode(expnode)
 
-      if qa_config.TestEnabled('instance-reinstall'):
-        RunTest(qa_instance.TestInstanceShutdown, instance)
-        RunTest(qa_instance.TestInstanceReinstall, instance)
-        RunTest(qa_instance.TestInstanceStartup, instance)
 
-      if qa_config.TestEnabled('node-volumes'):
-        RunTest(qa_node.TestNodeVolumes)
+def RunDaemonTests(instance, pnode):
+  """Test the ganeti-watcher script.
 
-      RunTest(qa_instance.TestInstanceRemove, instance)
-      del instance
+  """
+  automatic_restart = \
+    qa_config.TestEnabled('instance-automatic-restart')
+  consecutive_failures = \
+    qa_config.TestEnabled('instance-consecutive-failures')
 
-    if qa_config.TestEnabled('instance-add-local-mirror-disk'):
-      instance = RunTest(qa_instance.TestInstanceAddWithLocalMirrorDisk, node)
+  if automatic_restart or consecutive_failures:
+    qa_daemon.PrintCronWarning()
 
-      if qa_config.TestEnabled('instance-shutdown'):
-        RunTest(qa_instance.TestInstanceShutdown, instance)
-        RunTest(qa_instance.TestInstanceStartup, instance)
+    if automatic_restart:
+      RunTest(qa_daemon.TestInstanceAutomaticRestart, pnode, instance)
 
-      if qa_config.TestEnabled('instance-info'):
-        RunTest(qa_instance.TestInstanceInfo, instance)
+    if consecutive_failures:
+      RunTest(qa_daemon.TestInstanceConsecutiveFailures, node, instance)
 
-      if qa_config.TestEnabled('node-volumes'):
-        RunTest(qa_node.TestNodeVolumes)
 
-      RunTest(qa_instance.TestInstanceRemove, instance)
-      del instance
+def RunHardwareFailureTests(instance, pnode, snode):
+  """Test cluster internal hardware failure recovery.
 
-    if qa_config.TestEnabled('instance-add-remote-raid-disk'):
-      node2 = qa_config.AcquireNode(exclude=node)
-      try:
-        instance = RunTest(qa_instance.TestInstanceAddWithRemoteRaidDisk,
-                           node, node2)
+  """
+  if qa_config.TestEnabled('instance-failover'):
+    RunTest(qa_instance.TestInstanceFailover, instance)
+
+  if qa_config.TestEnabled('node-evacuate'):
+    RunTest(qa_node.TestNodeEvacuate, pnode, snode)
+
+  if qa_config.TestEnabled('node-failover'):
+    RunTest(qa_node.TestNodeFailover, pnode, snode)
+
+  if qa_config.TestEnabled('instance-disk-failure'):
+    RunTest(qa_instance.TestInstanceMasterDiskFailure,
+            instance, pnode, snode)
+    RunTest(qa_instance.TestInstanceSecondaryDiskFailure,
+            instance, pnode, snode)
+
+
+def main():
+  """Main program.
+
+  """
+  parser = OptionParser(usage="%prog [options] <config-file> "
+                              "<known-hosts-file>")
+  parser.add_option('--dry-run', dest='dry_run',
+      action="store_true",
+      help="Show what would be done")
+  parser.add_option('--yes-do-it', dest='yes_do_it',
+      action="store_true",
+      help="Really execute the tests")
+  (qa_config.options, args) = parser.parse_args()
 
-        if qa_config.TestEnabled('instance-shutdown'):
-          RunTest(qa_instance.TestInstanceShutdown, instance)
-          RunTest(qa_instance.TestInstanceStartup, instance)
+  if len(args) == 2:
+    (config_file, known_hosts_file) = args
+  else:
+    parser.error("Not enough arguments.")
 
-        if qa_config.TestEnabled('instance-info'):
-          RunTest(qa_instance.TestInstanceInfo, instance)
+  if not qa_config.options.yes_do_it:
+    print ("Executing this script irreversibly destroys any Ganeti\n"
+           "configuration on all nodes involved. If you really want\n"
+           "to start testing, supply the --yes-do-it option.")
+    sys.exit(1)
 
-        if qa_config.TestEnabled('instance-failover'):
-          RunTest(qa_instance.TestInstanceFailover, instance)
+  qa_config.Load(config_file)
 
-        if qa_config.TestEnabled('node-evacuate'):
-          RunTest(qa_node.TestNodeEvacuate, node, node2)
+  RunTest(qa_other.UploadKnownHostsFile, known_hosts_file)
 
-        if qa_config.TestEnabled('node-failover'):
-          RunTest(qa_node.TestNodeFailover, node, node2)
+  RunEnvTests()
+  SetupCluster()
+  RunClusterTests()
+  RunOsTests()
 
-        if qa_config.TestEnabled('node-volumes'):
-          RunTest(qa_node.TestNodeVolumes)
+  pnode = qa_config.AcquireNode()
+  try:
+    if qa_config.TestEnabled('instance-add-plain-disk'):
+      instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, pnode)
+      RunCommonInstanceTests(instance)
+      RunExportImportTests(instance, pnode)
+      RunDaemonTests(instance, pnode)
+      RunTest(qa_instance.TestInstanceRemove, instance)
+      del instance
 
-        if qa_config.TestEnabled('instance-disk-failure'):
-          RunTest(qa_instance.TestInstanceMasterDiskFailure,
-                  instance, node, node2)
-          RunTest(qa_instance.TestInstanceSecondaryDiskFailure,
-                  instance, node, node2)
+    if qa_config.TestEnabled('instance-add-local-mirror-disk'):
+      instance = RunTest(qa_instance.TestInstanceAddWithLocalMirrorDisk, pnode)
+      RunCommonInstanceTests(instance)
+      RunExportImportTests(instance, pnode)
+      RunTest(qa_instance.TestInstanceRemove, instance)
+      del instance
 
+    if qa_config.TestEnabled('instance-add-remote-raid-disk'):
+      snode = qa_config.AcquireNode(exclude=pnode)
+      try:
+        instance = RunTest(qa_instance.TestInstanceAddWithRemoteRaidDisk,
+                           pnode, snode)
+        RunCommonInstanceTests(instance)
+        RunExportImportTests(instance, pnode)
+        RunHardwareFailureTests(instance, pnode, snode)
         RunTest(qa_instance.TestInstanceRemove, instance)
         del instance
       finally:
-        qa_config.ReleaseNode(node2)
+        qa_config.ReleaseNode(snode)
 
   finally:
-    qa_config.ReleaseNode(node)
+    qa_config.ReleaseNode(pnode)
 
   RunTest(qa_node.TestNodeRemoveAll)
 
index ec17d70..39a5e4f 100644 (file)
@@ -239,15 +239,16 @@ def _TestInstanceDiskFailure(instance, node, node2, onmaster):
              r'\s+primary:\s+(/dev/drbd\d+)\s+')
   drbddevs = re.findall(pattern, output, re.M)
 
-  # Deactivate disks on secondary node
   halted_disks = []
-  cmds = []
-  for name in node2disk[[node2_full, node_full][int(onmaster)]]:
-    halted_disks.append(name)
-    cmds.append(sq(["echo", "offline"]) + " >%s" % _GetDiskStatePath(name))
-  AssertEqual(StartSSH([node2, node][int(onmaster)]['primary'],
-                       '; '.join(cmds)).wait(), 0)
   try:
+    # Deactivate disks
+    cmds = []
+    for name in node2disk[[node2_full, node_full][int(onmaster)]]:
+      halted_disks.append(name)
+      cmds.append(sq(["echo", "offline"]) + " >%s" % _GetDiskStatePath(name))
+    AssertEqual(StartSSH([node2, node][int(onmaster)]['primary'],
+                         ' && '.join(cmds)).wait(), 0)
+
     # Write something to the disks and give some time to notice the problem
     cmds = []
     for disk in devpath:
@@ -257,6 +258,10 @@ def _TestInstanceDiskFailure(instance, node, node2, onmaster):
       AssertEqual(StartSSH(node['primary'], ' && '.join(cmds)).wait(), 0)
       time.sleep(3)
 
+    for name in drbddevs:
+      cmd = ['drbdsetup', name, 'show']
+      AssertEqual(StartSSH(node['primary'], sq(cmd)).wait(), 0)
+
     # For manual checks
     cmd = ['gnt-instance', 'info', instance['name']]
     AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
@@ -269,15 +274,25 @@ def _TestInstanceDiskFailure(instance, node, node2, onmaster):
     AssertEqual(StartSSH([node2, node][int(onmaster)]['primary'],
                          '; '.join(cmds)).wait(), 0)
 
+  if onmaster:
+    for name in drbddevs:
+      cmd = ['drbdsetup', name, 'detach']
+      AssertEqual(StartSSH(node['primary'], sq(cmd)).wait(), 0)
+  else:
+    for name in drbddevs:
+      cmd = ['drbdsetup', name, 'disconnect']
+      AssertEqual(StartSSH(node2['primary'], sq(cmd)).wait(), 0)
+
+  # Make sure disks are up again
+  #cmd = ['gnt-instance', 'activate-disks', instance['name']]
+  #AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
+
   # Restart instance
   cmd = ['gnt-instance', 'shutdown', instance['name']]
   AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
 
-  cmd = ['gnt-instance', 'startup', '--force', instance['name']]
-  AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
-
-  # Make sure disks are up again
-  cmd = ['gnt-instance', 'activate-disks', instance['name']]
+  #cmd = ['gnt-instance', 'startup', '--force', instance['name']]
+  cmd = ['gnt-instance', 'startup', instance['name']]
   AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
 
   cmd = ['gnt-cluster', 'verify']
@@ -289,7 +304,7 @@ def TestInstanceMasterDiskFailure(instance, node, node2):
   qa_utils.PrintError("Disk failure on primary node cannot be "
                       "tested due to potential crashes.")
   # The following can cause crashes, thus it's disabled until fixed
-  #return _TestInstanceDiskFailure(instance, node, node2, True)
+  return _TestInstanceDiskFailure(instance, node, node2, True)
 
 
 def TestInstanceSecondaryDiskFailure(instance, node, node2):
index 968b7f4..5691511 100644 (file)
@@ -89,9 +89,9 @@ def TestNodeFailover(node, node2):
   master = qa_config.GetMasterNode()
 
   if qa_utils.GetNodeInstances(node2, secondaries=False):
-    raise qa_errors.UnusableNodeError("Secondary node has at least one "
-                                      "primary instance. This test requires "
-                                      "it to have no primary instances.")
+    raise qa_error.UnusableNodeError("Secondary node has at least one "
+                                     "primary instance. This test requires "
+                                     "it to have no primary instances.")
 
   # Fail over to secondary node
   cmd = ['gnt-node', 'failover', '-f', node['primary']]
@@ -111,9 +111,9 @@ def TestNodeEvacuate(node, node2):
   node3 = qa_config.AcquireNode(exclude=[node, node2])
   try:
     if qa_utils.GetNodeInstances(node3, secondaries=True):
-      raise qa_errors.UnusableNodeError("Evacuation node has at least one "
-                                        "secondary instance. This test requires "
-                                        "it to have no secondary instances.")
+      raise qa_error.UnusableNodeError("Evacuation node has at least one "
+                                       "secondary instance. This test requires "
+                                       "it to have no secondary instances.")
 
     # Evacuate all secondary instances
     cmd = ['gnt-node', 'evacuate', '-f', node2['primary'], node3['primary']]
index 6882254..d349a06 100644 (file)
@@ -25,7 +25,7 @@ import qa_utils
 from qa_utils import AssertEqual, StartSSH
 
 
-def TestUploadKnownHostsFile(localpath):
+def UploadKnownHostsFile(localpath):
   """Uploading known_hosts file.
 
   """