4 # Copyright (C) 2007, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Cluster related QA tests.
30 from ganeti import constants
31 from ganeti import compat
32 from ganeti import utils
33 from ganeti import pathutils
39 from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
42 # Prefix for LVM volumes created by QA code during tests
45 #: cluster verify command
46 _CLUSTER_VERIFY = ["gnt-cluster", "verify"]
49 def _RemoveFileFromAllNodes(filename):
50 """Removes a file from all nodes.
53 for node in qa_config.get("nodes"):
54 AssertCommand(["rm", "-f", filename], node=node)
57 def _CheckFileOnAllNodes(filename, content):
58 """Verifies the content of the given file on all nodes.
61 cmd = utils.ShellQuoteArgs(["cat", filename])
62 for node in qa_config.get("nodes"):
63 AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
66 # "gnt-cluster info" fields
67 _CIFIELD_RE = re.compile(r"^[-\s]*(?P<field>[^\s:]+):\s*(?P<value>\S.*)$")
70 def _GetBoolClusterField(field):
71 """Get the Boolean value of a cluster field.
73 This function currently assumes that the field name is unique in the cluster
74 configuration. An assertion checks this assumption.
77 @param field: Name of the field
79 @return: The effective value of the field
82 master = qa_config.GetMasterNode()
83 infocmd = "gnt-cluster info"
84 info_out = qa_utils.GetCommandOutput(master["primary"], infocmd)
86 for l in info_out.splitlines():
87 m = _CIFIELD_RE.match(l)
88 # FIXME: There should be a way to specify a field through a hierarchy
89 if m and m.group("field") == field:
90 # Make sure that ignoring the hierarchy doesn't cause a double match
92 ret = (m.group("value").lower() == "true")
95 raise qa_error.Error("Field not found in cluster configuration: %s" % field)
98 # Cluster-verify errors (date, "ERROR", then error code)
99 _CVERROR_RE = re.compile(r"^[\w\s:]+\s+- ERROR:([A-Z0-9_-]+):")
102 def _GetCVErrorCodes(cvout):
104 for l in cvout.splitlines():
105 m = _CVERROR_RE.match(l)
112 def AssertClusterVerify(fail=False, errors=None):
113 """Run cluster-verify and check the result
116 @param fail: if cluster-verify is expected to fail instead of succeeding
117 @type errors: list of tuples
118 @param errors: List of CV_XXX errors that are expected; if specified, all the
119 errors listed must appear in cluster-verify output. A non-empty value
120 implies C{fail=True}.
123 cvcmd = "gnt-cluster verify"
124 mnode = qa_config.GetMasterNode()
126 cvout = GetCommandOutput(mnode["primary"], cvcmd + " --error-codes",
128 actual = _GetCVErrorCodes(cvout)
129 expected = compat.UniqueFrozenset(e for (_, e, _) in errors)
130 if not actual.issuperset(expected):
131 missing = expected.difference(actual)
132 raise qa_error.Error("Cluster-verify didn't return these expected"
133 " errors: %s" % utils.CommaJoin(missing))
135 AssertCommand(cvcmd, fail=fail, node=mnode)
138 # data for testing failures due to bad keys/values for disk parameters
139 _FAIL_PARAMS = ["nonexistent:resync-rate=1",
140 "drbd:nonexistent=1",
141 "drbd:resync-rate=invalid",
145 def TestClusterInitDisk():
146 """gnt-cluster init -D"""
147 name = qa_config.get("name")
148 for param in _FAIL_PARAMS:
149 AssertCommand(["gnt-cluster", "init", "-D", param, name], fail=True)
152 def TestClusterInit(rapi_user, rapi_secret):
153 """gnt-cluster init"""
154 master = qa_config.GetMasterNode()
156 rapi_dir = os.path.dirname(pathutils.RAPI_USERS_FILE)
158 # First create the RAPI credentials
159 fh = tempfile.NamedTemporaryFile()
161 fh.write("%s %s write\n" % (rapi_user, rapi_secret))
164 tmpru = qa_utils.UploadFile(master["primary"], fh.name)
166 AssertCommand(["mkdir", "-p", rapi_dir])
167 AssertCommand(["mv", tmpru, pathutils.RAPI_USERS_FILE])
169 AssertCommand(["rm", "-f", tmpru])
175 "gnt-cluster", "init",
176 "--primary-ip-version=%d" % qa_config.get("primary_ip_version", 4),
177 "--enabled-hypervisors=%s" % ",".join(qa_config.GetEnabledHypervisors()),
180 for spec_type in ("mem-size", "disk-size", "disk-count", "cpu-count",
182 for spec_val in ("min", "max", "std"):
183 spec = qa_config.get("ispec_%s_%s" %
184 (spec_type.replace("-", "_"), spec_val), None)
186 cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec))
188 if master.get("secondary", None):
189 cmd.append("--secondary-ip=%s" % master["secondary"])
191 vgname = qa_config.get("vg-name", None)
193 cmd.append("--vg-name=%s" % vgname)
195 master_netdev = qa_config.get("master-netdev", None)
197 cmd.append("--master-netdev=%s" % master_netdev)
199 nicparams = qa_config.get("default-nicparams", None)
201 cmd.append("--nic-parameters=%s" %
202 ",".join(utils.FormatKeyValue(nicparams)))
204 cmd.append(qa_config.get("name"))
207 cmd = ["gnt-cluster", "modify"]
209 # hypervisor parameter modifications
210 hvp = qa_config.get("hypervisor-parameters", {})
211 for k, v in hvp.items():
212 cmd.extend(["-H", "%s:%s" % (k, v)])
213 # backend parameter modifications
214 bep = qa_config.get("backend-parameters", "")
216 cmd.extend(["-B", bep])
222 osp = qa_config.get("os-parameters", {})
223 for k, v in osp.items():
224 AssertCommand(["gnt-os", "modify", "-O", v, k])
226 # OS hypervisor parameters
227 os_hvp = qa_config.get("os-hvp", {})
228 for os_name in os_hvp:
229 for hv, hvp in os_hvp[os_name].items():
230 AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name])
233 def TestClusterRename():
234 """gnt-cluster rename"""
235 cmd = ["gnt-cluster", "rename", "-f"]
237 original_name = qa_config.get("name")
238 rename_target = qa_config.get("rename", None)
239 if rename_target is None:
240 print qa_utils.FormatError('"rename" entry is missing')
244 cmd + [rename_target],
246 cmd + [original_name],
252 def TestClusterOob():
253 """out-of-band framework"""
254 oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
256 AssertCommand(_CLUSTER_VERIFY)
257 AssertCommand(["gnt-cluster", "modify", "--node-parameters",
258 "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
261 AssertCommand(_CLUSTER_VERIFY, fail=True)
263 AssertCommand(["touch", oob_path_exists])
264 AssertCommand(["chmod", "0400", oob_path_exists])
265 AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
268 AssertCommand(["gnt-cluster", "modify", "--node-parameters",
269 "oob_program=%s" % oob_path_exists])
271 AssertCommand(_CLUSTER_VERIFY, fail=True)
273 AssertCommand(["chmod", "0500", oob_path_exists])
274 AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
276 AssertCommand(_CLUSTER_VERIFY)
278 AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
280 AssertCommand(["gnt-cluster", "modify", "--node-parameters",
284 def TestClusterEpo():
285 """gnt-cluster epo"""
286 master = qa_config.GetMasterNode()
288 # Assert that OOB is unavailable for all nodes
289 result_output = GetCommandOutput(master["primary"],
290 "gnt-node list --verbose --no-headers -o"
292 AssertEqual(compat.all(powered == "(unavail)"
293 for powered in result_output.splitlines()), True)
296 AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
297 # --all doesn't expect arguments
298 AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
300 # Unless --all is given master is not allowed to be in the list
301 AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
303 # This shouldn't fail
304 AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
306 # All instances should have been stopped now
307 result_output = GetCommandOutput(master["primary"],
308 "gnt-instance list --no-headers -o status")
309 # ERROR_down because the instance is stopped but not recorded as such
310 AssertEqual(compat.all(status == "ERROR_down"
311 for status in result_output.splitlines()), True)
313 # Now start everything again
314 AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
316 # All instances should have been started now
317 result_output = GetCommandOutput(master["primary"],
318 "gnt-instance list --no-headers -o status")
319 AssertEqual(compat.all(status == "running"
320 for status in result_output.splitlines()), True)
323 def TestClusterVerify():
324 """gnt-cluster verify"""
325 AssertCommand(_CLUSTER_VERIFY)
326 AssertCommand(["gnt-cluster", "verify-disks"])
330 """gnt-debug test-jobqueue"""
331 AssertCommand(["gnt-debug", "test-jobqueue"])
335 """gnt-debug delay"""
336 AssertCommand(["gnt-debug", "delay", "1"])
337 AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
338 AssertCommand(["gnt-debug", "delay", "--no-master",
339 "-n", node["primary"], "1"])
342 def TestClusterReservedLvs():
343 """gnt-cluster reserved lvs"""
344 vgname = qa_config.get("vg-name", constants.DEFAULT_VG)
345 lvname = _QA_LV_PREFIX + "test"
346 lvfullname = "/".join([vgname, lvname])
348 (False, _CLUSTER_VERIFY),
349 (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
350 (False, ["lvcreate", "-L1G", "-n", lvname, vgname]),
351 (True, _CLUSTER_VERIFY),
352 (False, ["gnt-cluster", "modify", "--reserved-lvs",
353 "%s,.*/other-test" % lvfullname]),
354 (False, _CLUSTER_VERIFY),
355 (False, ["gnt-cluster", "modify", "--reserved-lvs",
356 ".*/%s.*" % _QA_LV_PREFIX]),
357 (False, _CLUSTER_VERIFY),
358 (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
359 (True, _CLUSTER_VERIFY),
360 (False, ["lvremove", "-f", lvfullname]),
361 (False, _CLUSTER_VERIFY),
363 AssertCommand(cmd, fail=fail)
366 def TestClusterModifyEmpty():
367 """gnt-cluster modify"""
368 AssertCommand(["gnt-cluster", "modify"], fail=True)
371 def TestClusterModifyDisk():
372 """gnt-cluster modify -D"""
373 for param in _FAIL_PARAMS:
374 AssertCommand(["gnt-cluster", "modify", "-D", param], fail=True)
377 def TestClusterModifyBe():
378 """gnt-cluster modify -B"""
381 (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]),
382 (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
383 (False, ["gnt-cluster", "modify", "-B", "minmem=256"]),
384 (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
385 (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]),
386 (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
387 (True, ["gnt-cluster", "modify", "-B", "minmem=a"]),
388 (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
389 (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]),
390 (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]),
391 (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]),
393 (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
394 (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
395 (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]),
396 (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
397 (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
399 (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
400 (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
401 (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
402 (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
403 (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
405 AssertCommand(cmd, fail=fail)
407 # redo the original-requested BE parameters, if any
408 bep = qa_config.get("backend-parameters", "")
410 AssertCommand(["gnt-cluster", "modify", "-B", bep])
413 def TestClusterInfo():
414 """gnt-cluster info"""
415 AssertCommand(["gnt-cluster", "info"])
418 def TestClusterRedistConf():
419 """gnt-cluster redist-conf"""
420 AssertCommand(["gnt-cluster", "redist-conf"])
423 def TestClusterGetmaster():
424 """gnt-cluster getmaster"""
425 AssertCommand(["gnt-cluster", "getmaster"])
428 def TestClusterVersion():
429 """gnt-cluster version"""
430 AssertCommand(["gnt-cluster", "version"])
433 def TestClusterRenewCrypto():
434 """gnt-cluster renew-crypto"""
435 master = qa_config.GetMasterNode()
437 # Conflicting options
438 cmd = ["gnt-cluster", "renew-crypto", "--force",
439 "--new-cluster-certificate", "--new-confd-hmac-key"]
441 ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
442 ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
444 for i in conflicting:
445 AssertCommand(cmd + i, fail=True)
447 # Invalid RAPI certificate
448 cmd = ["gnt-cluster", "renew-crypto", "--force",
449 "--rapi-certificate=/dev/null"]
450 AssertCommand(cmd, fail=True)
452 rapi_cert_backup = qa_utils.BackupFile(master["primary"],
453 pathutils.RAPI_CERT_FILE)
455 # Custom RAPI certificate
456 fh = tempfile.NamedTemporaryFile()
458 # Ensure certificate doesn't cause "gnt-cluster verify" to complain
459 validity = constants.SSL_CERT_EXPIRATION_WARN * 3
461 utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
463 tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
465 AssertCommand(["gnt-cluster", "renew-crypto", "--force",
466 "--rapi-certificate=%s" % tmpcert])
468 AssertCommand(["rm", "-f", tmpcert])
470 # Custom cluster domain secret
471 cds_fh = tempfile.NamedTemporaryFile()
472 cds_fh.write(utils.GenerateSecret())
476 tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
478 AssertCommand(["gnt-cluster", "renew-crypto", "--force",
479 "--cluster-domain-secret=%s" % tmpcds])
481 AssertCommand(["rm", "-f", tmpcds])
484 AssertCommand(["gnt-cluster", "renew-crypto", "--force",
485 "--new-cluster-certificate", "--new-confd-hmac-key",
486 "--new-rapi-certificate", "--new-cluster-domain-secret"])
488 # Restore RAPI certificate
489 AssertCommand(["gnt-cluster", "renew-crypto", "--force",
490 "--rapi-certificate=%s" % rapi_cert_backup])
492 AssertCommand(["rm", "-f", rapi_cert_backup])
495 def TestClusterBurnin():
497 master = qa_config.GetMasterNode()
499 options = qa_config.get("options", {})
500 disk_template = options.get("burnin-disk-template", "drbd")
501 parallel = options.get("burnin-in-parallel", False)
502 check_inst = options.get("burnin-check-instances", False)
503 do_rename = options.get("burnin-rename", "")
504 do_reboot = options.get("burnin-reboot", True)
505 reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
507 # Get as many instances as we need
511 num = qa_config.get("options", {}).get("burnin-instances", 1)
512 for _ in range(0, num):
513 instances.append(qa_config.AcquireInstance())
514 except qa_error.OutOfInstancesError:
515 print "Not enough instances, continuing anyway."
517 if len(instances) < 1:
518 raise qa_error.Error("Burnin needs at least one instance")
520 script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
524 "--os=%s" % qa_config.get("os"),
525 "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM),
526 "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM),
527 "--disk-size=%s" % ",".join(qa_config.get("disk")),
528 "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")),
529 "--disk-template=%s" % disk_template]
531 cmd.append("--parallel")
532 cmd.append("--early-release")
534 cmd.append("--http-check")
536 cmd.append("--rename=%s" % do_rename)
538 cmd.append("--no-reboot")
540 cmd.append("--reboot-types=%s" % ",".join(reboot_types))
541 cmd += [inst["name"] for inst in instances]
544 AssertCommand(["rm", "-f", script])
547 for inst in instances:
548 qa_config.ReleaseInstance(inst)
551 def TestClusterMasterFailover():
552 """gnt-cluster master-failover"""
553 master = qa_config.GetMasterNode()
554 failovermaster = qa_config.AcquireNode(exclude=master)
556 cmd = ["gnt-cluster", "master-failover"]
558 AssertCommand(cmd, node=failovermaster)
559 # Back to original master node
560 AssertCommand(cmd, node=master)
562 qa_config.ReleaseNode(failovermaster)
565 def TestClusterMasterFailoverWithDrainedQueue():
566 """gnt-cluster master-failover with drained queue"""
567 drain_check = ["test", "-f", pathutils.JOB_QUEUE_DRAIN_FILE]
569 master = qa_config.GetMasterNode()
570 failovermaster = qa_config.AcquireNode(exclude=master)
572 # Ensure queue is not drained
573 for node in [master, failovermaster]:
574 AssertCommand(drain_check, node=node, fail=True)
576 # Drain queue on failover master
577 AssertCommand(["touch", pathutils.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
579 cmd = ["gnt-cluster", "master-failover"]
581 AssertCommand(drain_check, node=failovermaster)
582 AssertCommand(cmd, node=failovermaster)
583 AssertCommand(drain_check, fail=True)
584 AssertCommand(drain_check, node=failovermaster, fail=True)
586 # Back to original master node
587 AssertCommand(cmd, node=master)
589 qa_config.ReleaseNode(failovermaster)
591 AssertCommand(drain_check, fail=True)
592 AssertCommand(drain_check, node=failovermaster, fail=True)
595 def TestClusterCopyfile():
596 """gnt-cluster copyfile"""
597 master = qa_config.GetMasterNode()
599 uniqueid = utils.NewUUID()
601 # Create temporary file
602 f = tempfile.NamedTemporaryFile()
607 # Upload file to master node
608 testname = qa_utils.UploadFile(master["primary"], f.name)
610 # Copy file to all nodes
611 AssertCommand(["gnt-cluster", "copyfile", testname])
612 _CheckFileOnAllNodes(testname, uniqueid)
614 _RemoveFileFromAllNodes(testname)
617 def TestClusterCommand():
618 """gnt-cluster command"""
619 uniqueid = utils.NewUUID()
620 rfile = "/tmp/gnt%s" % utils.NewUUID()
621 rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid])
622 cmd = utils.ShellQuoteArgs(["gnt-cluster", "command",
623 "%s >%s" % (rcmd, rfile)])
627 _CheckFileOnAllNodes(rfile, uniqueid)
629 _RemoveFileFromAllNodes(rfile)
632 def TestClusterDestroy():
633 """gnt-cluster destroy"""
634 AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
637 def TestClusterRepairDiskSizes():
638 """gnt-cluster repair-disk-sizes"""
639 AssertCommand(["gnt-cluster", "repair-disk-sizes"])
642 def TestSetExclStorCluster(newvalue):
643 """Set the exclusive_storage node parameter at the cluster level.
646 @param newvalue: New value of exclusive_storage
648 @return: The old value of exclusive_storage
651 oldvalue = _GetBoolClusterField("exclusive_storage")
652 AssertCommand(["gnt-cluster", "modify", "--node-parameters",
653 "exclusive_storage=%s" % newvalue])
654 effvalue = _GetBoolClusterField("exclusive_storage")
655 if effvalue != newvalue:
656 raise qa_error.Error("exclusive_storage has the wrong value: %s instead"
657 " of %s" % (effvalue, newvalue))
661 def _BuildSetESCmd(value, node_name):
662 return ["gnt-node", "modify", "--node-parameters",
663 "exclusive_storage=%s" % value, node_name]
666 def TestExclStorSingleNode(node):
667 """cluster-verify reports exclusive_storage set only on one node.
670 node_name = node["primary"]
671 es_val = _GetBoolClusterField("exclusive_storage")
673 AssertCommand(_BuildSetESCmd(True, node_name))
674 AssertClusterVerify(fail=True, errors=[constants.CV_EGROUPMIXEDESFLAG])
675 AssertCommand(_BuildSetESCmd("default", node_name))
676 AssertClusterVerify()
679 def TestExclStorSharedPv(node):
680 """cluster-verify reports LVs that share the same PV with exclusive_storage.
683 vgname = qa_config.get("vg-name", constants.DEFAULT_VG)
684 lvname1 = _QA_LV_PREFIX + "vol1"
685 lvname2 = _QA_LV_PREFIX + "vol2"
686 node_name = node["primary"]
687 AssertCommand(["lvcreate", "-L1G", "-n", lvname1, vgname], node=node_name)
688 AssertClusterVerify(fail=True, errors=[constants.CV_ENODEORPHANLV])
689 AssertCommand(["lvcreate", "-L1G", "-n", lvname2, vgname], node=node_name)
690 AssertClusterVerify(fail=True, errors=[constants.CV_ENODELVM,
691 constants.CV_ENODEORPHANLV])
692 AssertCommand(["lvremove", "-f", "/".join([vgname, lvname1])], node=node_name)
693 AssertCommand(["lvremove", "-f", "/".join([vgname, lvname2])], node=node_name)
694 AssertClusterVerify()