Add QA test for verify-disks with broken DRBD
authorThomas Thrainer <thomasth@google.com>
Tue, 2 Jul 2013 07:00:59 +0000 (09:00 +0200)
committerThomas Thrainer <thomasth@google.com>
Tue, 2 Jul 2013 13:54:55 +0000 (15:54 +0200)
gnt-cluster verify-disks supports automatic activation of broken DRBD
disks. This new QA test verifies that it actually finds instances with
broken DRBD disks and activates the disks of those instances.

Signed-off-by: Thomas Thrainer <thomasth@google.com>
Reviewed-by: Klaus Aehlig <aehlig@google.com>

qa/ganeti-qa.py
qa/qa_cluster.py
qa/qa_instance.py

index ab1c1b6..1605a9e 100755 (executable)
@@ -273,7 +273,7 @@ def RunOsTests():
     RunTestIf(os_enabled, fn)
 
 
-def RunCommonInstanceTests(instance):
+def RunCommonInstanceTests(instance, inst_nodes):
   """Runs a few tests that are common to all disk types.
 
   """
@@ -346,6 +346,9 @@ def RunCommonInstanceTests(instance):
 
   RunTestIf("tags", qa_tags.TestInstanceTags, instance)
 
+  if instance.disk_template == constants.DT_DRBD8:
+    RunTestIf("cluster-verify",
+              qa_cluster.TestClusterVerifyDisksBrokenDRBD, instance, inst_nodes)
   RunTestIf("cluster-verify", qa_cluster.TestClusterVerify)
 
   RunTestIf(qa_rapi.Enabled, qa_rapi.TestInstance, instance)
@@ -729,7 +732,7 @@ def RunInstanceTests():
             RunTest(qa_instance.TestInstanceStartup, instance)
           RunTestIf("instance-modify-disks",
                     qa_instance.TestInstanceModifyDisks, instance)
-          RunCommonInstanceTests(instance)
+          RunCommonInstanceTests(instance, inodes)
           if qa_config.TestEnabled("instance-modify-primary"):
             othernode = qa_config.AcquireNode()
             RunTest(qa_instance.TestInstanceModifyPrimaryAndBack,
@@ -802,7 +805,7 @@ def RunQa():
                                   use_client)
           try:
             if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
-              RunCommonInstanceTests(rapi_instance)
+              RunCommonInstanceTests(rapi_instance, [pnode])
             RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
           finally:
             rapi_instance.Release()
index 3ea7263..838e2df 100644 (file)
@@ -33,6 +33,7 @@ from ganeti import utils
 from ganeti import pathutils
 
 import qa_config
+import qa_daemon
 import qa_utils
 import qa_error
 import qa_instance
@@ -347,6 +348,44 @@ def TestClusterVerify():
   AssertCommand(["gnt-cluster", "verify-disks"])
 
 
+def TestClusterVerifyDisksBrokenDRBD(instance, inst_nodes):
+  """gnt-cluster verify-disks with broken DRBD"""
+  qa_daemon.TestPauseWatcher()
+
+  try:
+    info = qa_instance.GetInstanceInfo(instance.name)
+    snode = inst_nodes[1]
+    for idx, minor in enumerate(info["drbd-minors"][snode.primary]):
+      if idx % 2 == 0:
+        break_drbd_cmd = \
+          "(drbdsetup %d down >/dev/null 2>&1;" \
+          " drbdsetup down resource%d >/dev/null 2>&1) || /bin/true" % \
+          (minor, minor)
+      else:
+        break_drbd_cmd = \
+          "(drbdsetup %d detach >/dev/null 2>&1;" \
+          " drbdsetup detach %d >/dev/null 2>&1) || /bin/true" % \
+          (minor, minor)
+      AssertCommand(break_drbd_cmd, node=snode)
+
+    verify_output = GetCommandOutput(qa_config.GetMasterNode().primary,
+                                     "gnt-cluster verify-disks")
+    activation_msg = "Activating disks for instance '%s'" % instance.name
+    if activation_msg not in verify_output:
+      raise qa_error.Error("gnt-cluster verify-disks did not activate broken"
+                           " DRBD disks:\n%s" % verify_output)
+
+    verify_output = GetCommandOutput(qa_config.GetMasterNode().primary,
+                                     "gnt-cluster verify-disks")
+    if activation_msg in verify_output:
+      raise qa_error.Error("gnt-cluster verify-disks wants to activate broken"
+                           " DRBD disks on second attempt:\n%s" % verify_output)
+
+    AssertCommand(_CLUSTER_VERIFY)
+  finally:
+    qa_daemon.TestResumeWatcher()
+
+
 def TestJobqueue():
   """gnt-debug test-jobqueue"""
   AssertCommand(["gnt-debug", "test-jobqueue"])
index e1ede85..626e3fc 100644 (file)
@@ -48,7 +48,7 @@ def _GetDiskStatePath(disk):
   return "/sys/block/%s/device/state" % disk
 
 
-def _GetInstanceInfo(instance):
+def GetInstanceInfo(instance):
   """Return information about the actual state of an instance.
 
   @type instance: string
@@ -129,7 +129,7 @@ def _DestroyInstanceDisks(instance):
   @param instance: the instance
 
   """
-  info = _GetInstanceInfo(instance.name)
+  info = GetInstanceInfo(instance.name)
   # FIXME: destruction/removal should be part of the disk class
   if info["storage-type"] == constants.ST_LVM_VG:
     vols = info["volumes"]
@@ -357,7 +357,7 @@ def TestInstanceRenameAndBack(rename_source, rename_target):
   finally:
     qa_utils.RemoveFromEtcHosts(["meeeeh-not-exists", rename_target])
 
-  info = _GetInstanceInfo(rename_source)
+  info = GetInstanceInfo(rename_source)
 
   # Check instance volume tags correctly updated. Note that this check is lvm
   # specific, so we skip it for non-lvm-based instances.
@@ -972,7 +972,7 @@ def TestRemoveInstanceOfflineNode(instance, snode, set_offline, set_online):
   @param set_online: function to call to set the node on-line
 
   """
-  info = _GetInstanceInfo(instance.name)
+  info = GetInstanceInfo(instance.name)
   set_offline(snode)
   try:
     TestInstanceRemove(instance)
@@ -991,7 +991,8 @@ def TestRemoveInstanceOfflineNode(instance, snode, set_offline, set_online):
         # syntax), we always have to perform both commands and ignore the
         # output.
         drbd_shutdown_cmd = \
-          "(drbdsetup %d down && drbdsetup down resource%d) || /bin/true" % \
+          "(drbdsetup %d down >/dev/null 2>&1;" \
+          " drbdsetup down resource%d >/dev/null 2>&1) || /bin/true" % \
             (minor, minor)
         AssertCommand(drbd_shutdown_cmd, node=snode)
       AssertCommand(["lvremove", "-f"] + info["volumes"], node=snode)