4 # Copyright (C) 2007, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Instance related QA tests.
29 from ganeti import utils
30 from ganeti import constants
31 from ganeti import query
37 from qa_utils import AssertIn, AssertCommand, AssertEqual
38 from qa_utils import InstanceCheck, INST_DOWN, INST_UP, FIRST_ARG, RETURN_VALUE
41 def _GetDiskStatePath(disk):
42 return "/sys/block/%s/device/state" % disk
45 def _GetGenericAddParameters(inst, force_mac=None):
47 params.append("%s=%s,%s=%s" % (constants.BE_MINMEM,
48 qa_config.get(constants.BE_MINMEM),
50 qa_config.get(constants.BE_MAXMEM)))
51 for idx, size in enumerate(qa_config.get("disk")):
52 params.extend(["--disk", "%s:size=%s" % (idx, size)])
54 # Set static MAC address if configured
58 nic0_mac = qa_config.GetInstanceNicMac(inst)
60 params.extend(["--net", "0:mac=%s" % nic0_mac])
65 def _DiskTest(node, disk_template):
66 instance = qa_config.AcquireInstance()
68 cmd = (["gnt-instance", "add",
69 "--os-type=%s" % qa_config.get("os"),
70 "--disk-template=%s" % disk_template,
72 _GetGenericAddParameters(instance))
73 cmd.append(instance["name"])
77 _CheckSsconfInstanceList(instance["name"])
81 qa_config.ReleaseInstance(instance)
85 def _DestroyInstanceVolumes(instance):
86 """Remove all the LVM volumes of an instance.
88 This is used to simulate HW errors (dead nodes, broken disks...); the
89 configuration of the instance is not affected.
92 master = qa_config.GetMasterNode()
93 infocmd = utils.ShellQuoteArgs(["gnt-instance", "info", instance["name"]])
94 info_out = qa_utils.GetCommandOutput(master["primary"], infocmd)
95 re_node = re.compile(r"^\s+-\s+(?:primary|secondaries):\s+(\S.+)$")
96 node_elem = r"([^,()]+)(?:\s+\([^)]+\))?"
97 # re_nodelist matches a list of nodes returned by gnt-instance info, e.g.:
99 # node2.fqdn,node3.fqdn
100 # node4.fqdn (group mygroup, group UUID 01234567-abcd-0123-4567-0123456789ab)
101 # FIXME This works with no more than 2 secondaries
102 re_nodelist = re.compile(node_elem + "(?:," + node_elem + ")?$")
103 re_vol = re.compile(r"^\s+logical_id:\s+(\S+)$")
106 for line in info_out.splitlines():
107 m = re_node.match(line)
110 m2 = re_nodelist.match(nodestr)
112 nodes.extend(filter(None, m2.groups()))
114 nodes.append(nodestr)
115 m = re_vol.match(line)
117 vols.append(m.group(1))
121 AssertCommand(["lvremove", "-f"] + vols, node=node)
124 @InstanceCheck(None, INST_UP, RETURN_VALUE)
125 def TestInstanceAddWithPlainDisk(node):
126 """gnt-instance add -t plain"""
127 return _DiskTest(node["primary"], "plain")
130 @InstanceCheck(None, INST_UP, RETURN_VALUE)
131 def TestInstanceAddWithDrbdDisk(node, node2):
132 """gnt-instance add -t drbd"""
133 return _DiskTest("%s:%s" % (node["primary"], node2["primary"]),
137 @InstanceCheck(None, INST_DOWN, FIRST_ARG)
138 def TestInstanceRemove(instance):
139 """gnt-instance remove"""
140 AssertCommand(["gnt-instance", "remove", "-f", instance["name"]])
142 qa_config.ReleaseInstance(instance)
145 @InstanceCheck(INST_DOWN, INST_UP, FIRST_ARG)
146 def TestInstanceStartup(instance):
147 """gnt-instance startup"""
148 AssertCommand(["gnt-instance", "startup", instance["name"]])
151 @InstanceCheck(INST_UP, INST_DOWN, FIRST_ARG)
152 def TestInstanceShutdown(instance):
153 """gnt-instance shutdown"""
154 AssertCommand(["gnt-instance", "shutdown", instance["name"]])
157 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
158 def TestInstanceReboot(instance):
159 """gnt-instance reboot"""
160 options = qa_config.get("options", {})
161 reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
162 name = instance["name"]
163 for rtype in reboot_types:
164 AssertCommand(["gnt-instance", "reboot", "--type=%s" % rtype, name])
166 AssertCommand(["gnt-instance", "shutdown", name])
167 qa_utils.RunInstanceCheck(instance, False)
168 AssertCommand(["gnt-instance", "reboot", name])
170 master = qa_config.GetMasterNode()
171 cmd = ["gnt-instance", "list", "--no-headers", "-o", "status", name]
172 result_output = qa_utils.GetCommandOutput(master["primary"],
173 utils.ShellQuoteArgs(cmd))
174 AssertEqual(result_output.strip(), constants.INSTST_RUNNING)
177 @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
178 def TestInstanceReinstall(instance):
179 """gnt-instance reinstall"""
180 AssertCommand(["gnt-instance", "reinstall", "-f", instance["name"]])
183 def _ReadSsconfInstanceList():
184 """Reads ssconf_instance_list from the master node.
187 master = qa_config.GetMasterNode()
189 cmd = ["cat", utils.PathJoin(constants.DATA_DIR,
190 "ssconf_%s" % constants.SS_INSTANCE_LIST)]
192 return qa_utils.GetCommandOutput(master["primary"],
193 utils.ShellQuoteArgs(cmd)).splitlines()
196 def _CheckSsconfInstanceList(instance):
197 """Checks if a certain instance is in the ssconf instance list.
199 @type instance: string
200 @param instance: Instance name
203 AssertIn(qa_utils.ResolveInstanceName(instance),
204 _ReadSsconfInstanceList())
207 @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
208 def TestInstanceRenameAndBack(rename_source, rename_target):
209 """gnt-instance rename
211 This must leave the instance with the original name, not the target
215 _CheckSsconfInstanceList(rename_source)
217 # first do a rename to a different actual name, expecting it to fail
218 qa_utils.AddToEtcHosts(["meeeeh-not-exists", rename_target])
220 AssertCommand(["gnt-instance", "rename", rename_source, rename_target],
222 _CheckSsconfInstanceList(rename_source)
224 qa_utils.RemoveFromEtcHosts(["meeeeh-not-exists", rename_target])
226 # and now rename instance to rename_target...
227 AssertCommand(["gnt-instance", "rename", rename_source, rename_target])
228 _CheckSsconfInstanceList(rename_target)
229 qa_utils.RunInstanceCheck(rename_source, False)
230 qa_utils.RunInstanceCheck(rename_target, False)
233 AssertCommand(["gnt-instance", "rename", rename_target, rename_source])
234 _CheckSsconfInstanceList(rename_source)
235 qa_utils.RunInstanceCheck(rename_target, False)
238 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
239 def TestInstanceFailover(instance):
240 """gnt-instance failover"""
241 cmd = ["gnt-instance", "failover", "--force", instance["name"]]
245 qa_utils.RunInstanceCheck(instance, True)
251 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
252 def TestInstanceMigrate(instance):
253 """gnt-instance migrate"""
254 cmd = ["gnt-instance", "migrate", "--force", instance["name"]]
258 qa_utils.RunInstanceCheck(instance, True)
263 # TODO: Split into multiple tests
264 AssertCommand(["gnt-instance", "shutdown", instance["name"]])
265 qa_utils.RunInstanceCheck(instance, False)
266 AssertCommand(cmd, fail=True)
267 AssertCommand(["gnt-instance", "migrate", "--force", "--allow-failover",
269 AssertCommand(["gnt-instance", "start", instance["name"]])
271 qa_utils.RunInstanceCheck(instance, True)
273 AssertCommand(["gnt-instance", "modify", "-B",
275 (constants.BE_ALWAYS_FAILOVER, constants.VALUE_TRUE)),
279 qa_utils.RunInstanceCheck(instance, True)
280 # TODO: Verify that a failover has been done instead of a migration
282 # TODO: Verify whether the default value is restored here (not hardcoded)
283 AssertCommand(["gnt-instance", "modify", "-B",
285 (constants.BE_ALWAYS_FAILOVER, constants.VALUE_FALSE)),
289 qa_utils.RunInstanceCheck(instance, True)
292 def TestInstanceInfo(instance):
293 """gnt-instance info"""
294 AssertCommand(["gnt-instance", "info", instance["name"]])
297 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
298 def TestInstanceModify(instance):
299 """gnt-instance modify"""
300 default_hv = qa_config.GetDefaultHypervisor()
302 # Assume /sbin/init exists on all systems
303 test_kernel = "/sbin/init"
304 test_initrd = test_kernel
306 orig_maxmem = qa_config.get(constants.BE_MAXMEM)
307 orig_minmem = qa_config.get(constants.BE_MINMEM)
308 #orig_bridge = qa_config.get("bridge", "xen-br0")
311 ["-B", "%s=128" % constants.BE_MINMEM],
312 ["-B", "%s=128" % constants.BE_MAXMEM],
313 ["-B", "%s=%s,%s=%s" % (constants.BE_MINMEM, orig_minmem,
314 constants.BE_MAXMEM, orig_maxmem)],
315 ["-B", "%s=2" % constants.BE_VCPUS],
316 ["-B", "%s=1" % constants.BE_VCPUS],
317 ["-B", "%s=%s" % (constants.BE_VCPUS, constants.VALUE_DEFAULT)],
318 ["-B", "%s=%s" % (constants.BE_ALWAYS_FAILOVER, constants.VALUE_TRUE)],
319 ["-B", "%s=%s" % (constants.BE_ALWAYS_FAILOVER, constants.VALUE_DEFAULT)],
321 ["-H", "%s=%s" % (constants.HV_KERNEL_PATH, test_kernel)],
322 ["-H", "%s=%s" % (constants.HV_KERNEL_PATH, constants.VALUE_DEFAULT)],
325 #["--bridge", "xen-br1"],
326 #["--bridge", orig_bridge],
329 if default_hv == constants.HT_XEN_PVM:
331 ["-H", "%s=%s" % (constants.HV_INITRD_PATH, test_initrd)],
332 ["-H", "no_%s" % (constants.HV_INITRD_PATH, )],
333 ["-H", "%s=%s" % (constants.HV_INITRD_PATH, constants.VALUE_DEFAULT)],
335 elif default_hv == constants.HT_XEN_HVM:
337 ["-H", "%s=acn" % constants.HV_BOOT_ORDER],
338 ["-H", "%s=%s" % (constants.HV_BOOT_ORDER, constants.VALUE_DEFAULT)],
342 AssertCommand(["gnt-instance", "modify"] + alist + [instance["name"]])
345 AssertCommand(["gnt-instance", "modify", instance["name"]], fail=True)
347 # Marking offline/online while instance is running must fail
348 for arg in ["--online", "--offline"]:
349 AssertCommand(["gnt-instance", "modify", arg, instance["name"]], fail=True)
352 @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
353 def TestInstanceStoppedModify(instance):
354 """gnt-instance modify (stopped instance)"""
355 name = instance["name"]
357 # Instance was not marked offline; try marking it online once more
358 AssertCommand(["gnt-instance", "modify", "--online", name])
360 # Mark instance as offline
361 AssertCommand(["gnt-instance", "modify", "--offline", name])
364 AssertCommand(["gnt-instance", "modify", "--online", name])
367 @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
368 def TestInstanceConvertDisk(instance, snode):
369 """gnt-instance modify -t"""
370 name = instance["name"]
371 AssertCommand(["gnt-instance", "modify", "-t", "plain", name])
372 AssertCommand(["gnt-instance", "modify", "-t", "drbd",
373 "-n", snode["primary"], name])
376 @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
377 def TestInstanceGrowDisk(instance):
378 """gnt-instance grow-disk"""
379 name = instance["name"]
380 all_size = qa_config.get("disk")
381 all_grow = qa_config.get("disk-growth")
383 # missing disk sizes but instance grow disk has been enabled,
384 # let's set fixed/nomimal growth
385 all_grow = ["128M" for _ in all_size]
386 for idx, (size, grow) in enumerate(zip(all_size, all_grow)):
387 # succeed in grow by amount
388 AssertCommand(["gnt-instance", "grow-disk", name, str(idx), grow])
389 # fail in grow to the old size
390 AssertCommand(["gnt-instance", "grow-disk", "--absolute", name, str(idx),
392 # succeed to grow to old size + 2 * growth
393 int_size = utils.ParseUnit(size)
394 int_grow = utils.ParseUnit(grow)
395 AssertCommand(["gnt-instance", "grow-disk", "--absolute", name, str(idx),
396 str(int_size + 2 * int_grow)])
399 def TestInstanceList():
400 """gnt-instance list"""
401 qa_utils.GenericQueryTest("gnt-instance", query.INSTANCE_FIELDS.keys())
404 def TestInstanceListFields():
405 """gnt-instance list-fields"""
406 qa_utils.GenericQueryFieldsTest("gnt-instance", query.INSTANCE_FIELDS.keys())
409 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
410 def TestInstanceConsole(instance):
411 """gnt-instance console"""
412 AssertCommand(["gnt-instance", "console", "--show-cmd", instance["name"]])
415 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
416 def TestReplaceDisks(instance, pnode, snode, othernode):
417 """gnt-instance replace-disks"""
418 # pylint: disable=W0613
419 # due to unused pnode arg
420 # FIXME: should be removed from the function completely
422 cmd = ["gnt-instance", "replace-disks"]
424 cmd.append(instance["name"])
430 ["--new-secondary=%s" % othernode["primary"]],
432 ["--new-secondary=%s" % snode["primary"]],
434 AssertCommand(buildcmd(data))
436 AssertCommand(buildcmd(["-a"]))
437 AssertCommand(["gnt-instance", "stop", instance["name"]])
438 AssertCommand(buildcmd(["-a"]), fail=True)
439 AssertCommand(["gnt-instance", "activate-disks", instance["name"]])
440 AssertCommand(["gnt-instance", "activate-disks", "--wait-for-sync",
442 AssertCommand(buildcmd(["-a"]))
443 AssertCommand(["gnt-instance", "start", instance["name"]])
446 def _AssertRecreateDisks(cmdargs, instance, fail=False, check=True,
448 """Execute gnt-instance recreate-disks and check the result
450 @param cmdargs: Arguments (instance name excluded)
451 @param instance: Instance to operate on
452 @param fail: True if the command is expected to fail
453 @param check: If True and fail is False, check that the disks work
454 @prama destroy: If True, destroy the old disks first
458 _DestroyInstanceVolumes(instance)
459 AssertCommand((["gnt-instance", "recreate-disks"] + cmdargs +
460 [instance["name"]]), fail)
461 if not fail and check:
462 # Quick check that the disks are there
463 AssertCommand(["gnt-instance", "activate-disks", instance["name"]])
464 AssertCommand(["gnt-instance", "activate-disks", "--wait-for-sync",
466 AssertCommand(["gnt-instance", "deactivate-disks", instance["name"]])
469 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
470 def TestRecreateDisks(instance, pnode, snode, othernodes):
471 """gnt-instance recreate-disks
473 @param instance: Instance to work on
474 @param pnode: Primary node
475 @param snode: Secondary node, or None for sigle-homed instances
476 @param othernodes: list/tuple of nodes where to temporarily recreate disks
479 other_seq = ":".join([n["primary"] for n in othernodes])
480 orig_seq = pnode["primary"]
482 orig_seq = orig_seq + ":" + snode["primary"]
483 # These fail because the instance is running
484 _AssertRecreateDisks(["-n", other_seq], instance, fail=True, destroy=False)
485 _AssertRecreateDisks(["-I", "hail"], instance, fail=True, destroy=False)
486 AssertCommand(["gnt-instance", "stop", instance["name"]])
487 # Disks exist: this should fail
488 _AssertRecreateDisks([], instance, fail=True, destroy=False)
489 # Recreate disks in place
490 _AssertRecreateDisks([], instance)
492 _AssertRecreateDisks(["-I", "hail"], instance)
494 _AssertRecreateDisks(["-n", orig_seq], instance, check=False)
495 # This and InstanceCheck decoration check that the disks are working
496 AssertCommand(["gnt-instance", "reinstall", "-f", instance["name"]])
497 AssertCommand(["gnt-instance", "start", instance["name"]])
500 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
501 def TestInstanceExport(instance, node):
502 """gnt-backup export -n ..."""
503 name = instance["name"]
504 AssertCommand(["gnt-backup", "export", "-n", node["primary"], name])
505 return qa_utils.ResolveInstanceName(name)
508 @InstanceCheck(None, INST_DOWN, FIRST_ARG)
509 def TestInstanceExportWithRemove(instance, node):
510 """gnt-backup export --remove-instance"""
511 AssertCommand(["gnt-backup", "export", "-n", node["primary"],
512 "--remove-instance", instance["name"]])
515 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
516 def TestInstanceExportNoTarget(instance):
517 """gnt-backup export (without target node, should fail)"""
518 AssertCommand(["gnt-backup", "export", instance["name"]], fail=True)
521 @InstanceCheck(None, INST_DOWN, FIRST_ARG)
522 def TestInstanceImport(newinst, node, expnode, name):
523 """gnt-backup import"""
524 cmd = (["gnt-backup", "import",
525 "--disk-template=plain",
527 "--src-node=%s" % expnode["primary"],
528 "--src-dir=%s/%s" % (constants.EXPORT_DIR, name),
529 "--node=%s" % node["primary"]] +
530 _GetGenericAddParameters(newinst, force_mac=constants.VALUE_GENERATE))
531 cmd.append(newinst["name"])
535 def TestBackupList(expnode):
536 """gnt-backup list"""
537 AssertCommand(["gnt-backup", "list", "--node=%s" % expnode["primary"]])
539 qa_utils.GenericQueryTest("gnt-backup", query.EXPORT_FIELDS.keys(),
540 namefield=None, test_unknown=False)
543 def TestBackupListFields():
544 """gnt-backup list-fields"""
545 qa_utils.GenericQueryFieldsTest("gnt-backup", query.EXPORT_FIELDS.keys())
548 def _TestInstanceDiskFailure(instance, node, node2, onmaster):
549 """Testing disk failure."""
550 master = qa_config.GetMasterNode()
551 sq = utils.ShellQuoteArgs
553 instance_full = qa_utils.ResolveInstanceName(instance["name"])
554 node_full = qa_utils.ResolveNodeName(node)
555 node2_full = qa_utils.ResolveNodeName(node2)
557 print qa_utils.FormatInfo("Getting physical disk names")
558 cmd = ["gnt-node", "volumes", "--separator=|", "--no-headers",
559 "--output=node,phys,instance",
560 node["primary"], node2["primary"]]
561 output = qa_utils.GetCommandOutput(master["primary"], sq(cmd))
563 # Get physical disk names
564 re_disk = re.compile(r"^/dev/([a-z]+)\d+$")
566 for line in output.splitlines():
567 (node_name, phys, inst) = line.split("|")
568 if inst == instance_full:
569 if node_name not in node2disk:
570 node2disk[node_name] = []
572 m = re_disk.match(phys)
574 raise qa_error.Error("Unknown disk name format: %s" % phys)
577 if name not in node2disk[node_name]:
578 node2disk[node_name].append(name)
580 if [node2_full, node_full][int(onmaster)] not in node2disk:
581 raise qa_error.Error("Couldn't find physical disks used on"
582 " %s node" % ["secondary", "master"][int(onmaster)])
584 print qa_utils.FormatInfo("Checking whether nodes have ability to stop"
586 for node_name, disks in node2disk.iteritems():
589 cmds.append(sq(["test", "-f", _GetDiskStatePath(disk)]))
590 AssertCommand(" && ".join(cmds), node=node_name)
592 print qa_utils.FormatInfo("Getting device paths")
593 cmd = ["gnt-instance", "activate-disks", instance["name"]]
594 output = qa_utils.GetCommandOutput(master["primary"], sq(cmd))
596 for line in output.splitlines():
597 (_, _, tmpdevpath) = line.split(":")
598 devpath.append(tmpdevpath)
601 print qa_utils.FormatInfo("Getting drbd device paths")
602 cmd = ["gnt-instance", "info", instance["name"]]
603 output = qa_utils.GetCommandOutput(master["primary"], sq(cmd))
604 pattern = (r"\s+-\s+sd[a-z]+,\s+type:\s+drbd8?,\s+.*$"
605 r"\s+primary:\s+(/dev/drbd\d+)\s+")
606 drbddevs = re.findall(pattern, output, re.M)
611 print qa_utils.FormatInfo("Deactivating disks")
613 for name in node2disk[[node2_full, node_full][int(onmaster)]]:
614 halted_disks.append(name)
615 cmds.append(sq(["echo", "offline"]) + " >%s" % _GetDiskStatePath(name))
616 AssertCommand(" && ".join(cmds), node=[node2, node][int(onmaster)])
618 print qa_utils.FormatInfo("Write to disks and give some time to notice"
622 cmds.append(sq(["dd", "count=1", "bs=512", "conv=notrunc",
623 "if=%s" % disk, "of=%s" % disk]))
625 AssertCommand(" && ".join(cmds), node=node)
628 print qa_utils.FormatInfo("Debugging info")
629 for name in drbddevs:
630 AssertCommand(["drbdsetup", name, "show"], node=node)
632 AssertCommand(["gnt-instance", "info", instance["name"]])
635 print qa_utils.FormatInfo("Activating disks again")
637 for name in halted_disks:
638 cmds.append(sq(["echo", "running"]) + " >%s" % _GetDiskStatePath(name))
639 AssertCommand("; ".join(cmds), node=[node2, node][int(onmaster)])
642 for name in drbddevs:
643 AssertCommand(["drbdsetup", name, "detach"], node=node)
645 for name in drbddevs:
646 AssertCommand(["drbdsetup", name, "disconnect"], node=node2)
649 #AssertCommand(["vgs"], [node2, node][int(onmaster)])
651 print qa_utils.FormatInfo("Making sure disks are up again")
652 AssertCommand(["gnt-instance", "replace-disks", instance["name"]])
654 print qa_utils.FormatInfo("Restarting instance")
655 AssertCommand(["gnt-instance", "shutdown", instance["name"]])
656 AssertCommand(["gnt-instance", "startup", instance["name"]])
658 AssertCommand(["gnt-cluster", "verify"])
661 def TestInstanceMasterDiskFailure(instance, node, node2):
662 """Testing disk failure on master node."""
663 # pylint: disable=W0613
665 print qa_utils.FormatError("Disk failure on primary node cannot be"
666 " tested due to potential crashes.")
667 # The following can cause crashes, thus it's disabled until fixed
668 #return _TestInstanceDiskFailure(instance, node, node2, True)
671 def TestInstanceSecondaryDiskFailure(instance, node, node2):
672 """Testing disk failure on secondary node."""
673 return _TestInstanceDiskFailure(instance, node, node2, False)