Revision e4c346a5

b/qa/ganeti-qa.py
450 450

  
451 451
  RunTestIf("node-failover", qa_node.TestNodeFailover, pnode, snode)
452 452

  
453
  RunTestIf("instance-disk-failure", qa_instance.TestInstanceMasterDiskFailure,
454
            instance, pnode, snode)
455
  RunTestIf("instance-disk-failure",
456
            qa_instance.TestInstanceSecondaryDiskFailure, instance,
457
            pnode, snode)
458

  
459 453

  
460 454
def RunExclusiveStorageTests():
461 455
  """Test exclusive storage."""
b/qa/qa-sample.json
169 169
    "instance-recreate-disks": false,
170 170

  
171 171
    "# Whether to test the tools/move-instance utility": null,
172
    "inter-cluster-instance-move": false,
173

  
174
    "# Make sure not to include the disk(s) required for Dom0 to be up": null,
175
    "# in the volume group used for instances. Otherwise the whole": null,
176
    "# system may stop working until restarted.": null,
177
    "instance-disk-failure": false
172
    "inter-cluster-instance-move": false
178 173
  },
179 174

  
180 175
  "options": {
b/qa/qa_instance.py
24 24
"""
25 25

  
26 26
import re
27
import time
28 27

  
29 28
from ganeti import utils
30 29
from ganeti import constants
......
654 653
def TestBackupListFields():
655 654
  """gnt-backup list-fields"""
656 655
  qa_utils.GenericQueryFieldsTest("gnt-backup", query.EXPORT_FIELDS.keys())
657

  
658

  
659
def _TestInstanceDiskFailure(instance, node, node2, onmaster):
660
  """Testing disk failure."""
661
  master = qa_config.GetMasterNode()
662
  sq = utils.ShellQuoteArgs
663

  
664
  instance_full = qa_utils.ResolveInstanceName(instance["name"])
665
  node_full = qa_utils.ResolveNodeName(node)
666
  node2_full = qa_utils.ResolveNodeName(node2)
667

  
668
  print qa_utils.FormatInfo("Getting physical disk names")
669
  cmd = ["gnt-node", "volumes", "--separator=|", "--no-headers",
670
         "--output=node,phys,instance",
671
         node["primary"], node2["primary"]]
672
  output = qa_utils.GetCommandOutput(master["primary"], sq(cmd))
673

  
674
  # Get physical disk names
675
  re_disk = re.compile(r"^/dev/([a-z]+)\d+$")
676
  node2disk = {}
677
  for line in output.splitlines():
678
    (node_name, phys, inst) = line.split("|")
679
    if inst == instance_full:
680
      if node_name not in node2disk:
681
        node2disk[node_name] = []
682

  
683
      m = re_disk.match(phys)
684
      if not m:
685
        raise qa_error.Error("Unknown disk name format: %s" % phys)
686

  
687
      name = m.group(1)
688
      if name not in node2disk[node_name]:
689
        node2disk[node_name].append(name)
690

  
691
  if [node2_full, node_full][int(onmaster)] not in node2disk:
692
    raise qa_error.Error("Couldn't find physical disks used on"
693
                         " %s node" % ["secondary", "master"][int(onmaster)])
694

  
695
  print qa_utils.FormatInfo("Checking whether nodes have ability to stop"
696
                            " disks")
697
  for node_name, disks in node2disk.iteritems():
698
    cmds = []
699
    for disk in disks:
700
      cmds.append(sq(["test", "-f", _GetDiskStatePath(disk)]))
701
    AssertCommand(" && ".join(cmds), node=node_name)
702

  
703
  print qa_utils.FormatInfo("Getting device paths")
704
  cmd = ["gnt-instance", "activate-disks", instance["name"]]
705
  output = qa_utils.GetCommandOutput(master["primary"], sq(cmd))
706
  devpath = []
707
  for line in output.splitlines():
708
    (_, _, tmpdevpath) = line.split(":")
709
    devpath.append(tmpdevpath)
710
  print devpath
711

  
712
  print qa_utils.FormatInfo("Getting drbd device paths")
713
  cmd = ["gnt-instance", "info", instance["name"]]
714
  output = qa_utils.GetCommandOutput(master["primary"], sq(cmd))
715
  pattern = (r"\s+-\s+sd[a-z]+,\s+type:\s+drbd8?,\s+.*$"
716
             r"\s+primary:\s+(/dev/drbd\d+)\s+")
717
  drbddevs = re.findall(pattern, output, re.M)
718
  print drbddevs
719

  
720
  halted_disks = []
721
  try:
722
    print qa_utils.FormatInfo("Deactivating disks")
723
    cmds = []
724
    for name in node2disk[[node2_full, node_full][int(onmaster)]]:
725
      halted_disks.append(name)
726
      cmds.append(sq(["echo", "offline"]) + " >%s" % _GetDiskStatePath(name))
727
    AssertCommand(" && ".join(cmds), node=[node2, node][int(onmaster)])
728

  
729
    print qa_utils.FormatInfo("Write to disks and give some time to notice"
730
                              " the problem")
731
    cmds = []
732
    for disk in devpath:
733
      cmds.append(sq(["dd", "count=1", "bs=512", "conv=notrunc",
734
                      "if=%s" % disk, "of=%s" % disk]))
735
    for _ in (0, 1, 2):
736
      AssertCommand(" && ".join(cmds), node=node)
737
      time.sleep(3)
738

  
739
    print qa_utils.FormatInfo("Debugging info")
740
    for name in drbddevs:
741
      AssertCommand(["drbdsetup", name, "show"], node=node)
742

  
743
    AssertCommand(["gnt-instance", "info", instance["name"]])
744

  
745
  finally:
746
    print qa_utils.FormatInfo("Activating disks again")
747
    cmds = []
748
    for name in halted_disks:
749
      cmds.append(sq(["echo", "running"]) + " >%s" % _GetDiskStatePath(name))
750
    AssertCommand("; ".join(cmds), node=[node2, node][int(onmaster)])
751

  
752
  if onmaster:
753
    for name in drbddevs:
754
      AssertCommand(["drbdsetup", name, "detach"], node=node)
755
  else:
756
    for name in drbddevs:
757
      AssertCommand(["drbdsetup", name, "disconnect"], node=node2)
758

  
759
  # TODO
760
  #AssertCommand(["vgs"], [node2, node][int(onmaster)])
761

  
762
  print qa_utils.FormatInfo("Making sure disks are up again")
763
  AssertCommand(["gnt-instance", "replace-disks", instance["name"]])
764

  
765
  print qa_utils.FormatInfo("Restarting instance")
766
  AssertCommand(["gnt-instance", "shutdown", instance["name"]])
767
  AssertCommand(["gnt-instance", "startup", instance["name"]])
768

  
769
  AssertCommand(["gnt-cluster", "verify"])
770

  
771

  
772
def TestInstanceMasterDiskFailure(instance, node, node2):
773
  """Testing disk failure on master node."""
774
  # pylint: disable=W0613
775
  # due to unused args
776
  print qa_utils.FormatError("Disk failure on primary node cannot be"
777
                             " tested due to potential crashes.")
778
  # The following can cause crashes, thus it's disabled until fixed
779
  #return _TestInstanceDiskFailure(instance, node, node2, True)
780

  
781

  
782
def TestInstanceSecondaryDiskFailure(instance, node, node2):
783
  """Testing disk failure on secondary node."""
784
  return _TestInstanceDiskFailure(instance, node, node2, False)

Also available in: Unified diff