4 # Copyright (C) 2007, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Cluster related QA tests.
30 from ganeti import constants
31 from ganeti import compat
32 from ganeti import utils
33 from ganeti import pathutils
39 from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
42 #: cluster verify command
43 _CLUSTER_VERIFY = ["gnt-cluster", "verify"]
46 def _RemoveFileFromAllNodes(filename):
47 """Removes a file from all nodes.
50 for node in qa_config.get("nodes"):
51 AssertCommand(["rm", "-f", filename], node=node)
54 def _CheckFileOnAllNodes(filename, content):
55 """Verifies the content of the given file on all nodes.
58 cmd = utils.ShellQuoteArgs(["cat", filename])
59 for node in qa_config.get("nodes"):
60 AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
63 # "gnt-cluster info" fields
64 _CIFIELD_RE = re.compile(r"^[-\s]*(?P<field>[^\s:]+):\s*(?P<value>\S.*)$")
67 def _GetBoolClusterField(field):
68 """Get the Boolean value of a cluster field.
70 This function currently assumes that the field name is unique in the cluster
71 configuration. An assertion checks this assumption.
74 @param field: Name of the field
76 @return: The effective value of the field
79 master = qa_config.GetMasterNode()
80 infocmd = "gnt-cluster info"
81 info_out = qa_utils.GetCommandOutput(master["primary"], infocmd)
83 for l in info_out.splitlines():
84 m = _CIFIELD_RE.match(l)
85 # FIXME: There should be a way to specify a field through a hierarchy
86 if m and m.group("field") == field:
87 # Make sure that ignoring the hierarchy doesn't cause a double match
89 ret = (m.group("value").lower() == "true")
92 raise qa_error.Error("Field not found in cluster configuration: %s" % field)
95 # Cluster-verify errors (date, "ERROR", then error code)
96 _CVERROR_RE = re.compile(r"^[\w\s:]+\s+- ERROR:([A-Z0-9_-]+):")
99 def _GetCVErrorCodes(cvout):
101 for l in cvout.splitlines():
102 m = _CVERROR_RE.match(l)
109 def AssertClusterVerify(fail=False, errors=None):
110 """Run cluster-verify and check the result
113 @param fail: if cluster-verify is expected to fail instead of succeeding
114 @type errors: list of tuples
115 @param errors: List of CV_XXX errors that are expected; if specified, all the
116 errors listed must appear in cluster-verify output. A non-empty value
117 implies C{fail=True}.
120 cvcmd = "gnt-cluster verify"
121 mnode = qa_config.GetMasterNode()
123 cvout = GetCommandOutput(mnode["primary"], cvcmd + " --error-codes",
125 actual = _GetCVErrorCodes(cvout)
126 expected = compat.UniqueFrozenset(e for (_, e, _) in errors)
127 if not actual.issuperset(expected):
128 missing = expected.difference(actual)
129 raise qa_error.Error("Cluster-verify didn't return these expected"
130 " errors: %s" % utils.CommaJoin(missing))
132 AssertCommand(cvcmd, fail=fail, node=mnode)
135 # data for testing failures due to bad keys/values for disk parameters
136 _FAIL_PARAMS = ["nonexistent:resync-rate=1",
137 "drbd:nonexistent=1",
138 "drbd:resync-rate=invalid",
142 def TestClusterInitDisk():
143 """gnt-cluster init -D"""
144 name = qa_config.get("name")
145 for param in _FAIL_PARAMS:
146 AssertCommand(["gnt-cluster", "init", "-D", param, name], fail=True)
149 def TestClusterInit(rapi_user, rapi_secret):
150 """gnt-cluster init"""
151 master = qa_config.GetMasterNode()
153 rapi_dir = os.path.dirname(pathutils.RAPI_USERS_FILE)
155 # First create the RAPI credentials
156 fh = tempfile.NamedTemporaryFile()
158 fh.write("%s %s write\n" % (rapi_user, rapi_secret))
161 tmpru = qa_utils.UploadFile(master["primary"], fh.name)
163 AssertCommand(["mkdir", "-p", rapi_dir])
164 AssertCommand(["mv", tmpru, pathutils.RAPI_USERS_FILE])
166 AssertCommand(["rm", "-f", tmpru])
172 "gnt-cluster", "init",
173 "--primary-ip-version=%d" % qa_config.get("primary_ip_version", 4),
174 "--enabled-hypervisors=%s" % ",".join(qa_config.GetEnabledHypervisors()),
177 for spec_type in ("mem-size", "disk-size", "disk-count", "cpu-count",
179 for spec_val in ("min", "max", "std"):
180 spec = qa_config.get("ispec_%s_%s" %
181 (spec_type.replace("-", "_"), spec_val), None)
183 cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec))
185 if master.get("secondary", None):
186 cmd.append("--secondary-ip=%s" % master["secondary"])
188 master_netdev = qa_config.get("master-netdev", None)
190 cmd.append("--master-netdev=%s" % master_netdev)
192 nicparams = qa_config.get("default-nicparams", None)
194 cmd.append("--nic-parameters=%s" %
195 ",".join(utils.FormatKeyValue(nicparams)))
197 cmd.append(qa_config.get("name"))
200 cmd = ["gnt-cluster", "modify"]
202 # hypervisor parameter modifications
203 hvp = qa_config.get("hypervisor-parameters", {})
204 for k, v in hvp.items():
205 cmd.extend(["-H", "%s:%s" % (k, v)])
206 # backend parameter modifications
207 bep = qa_config.get("backend-parameters", "")
209 cmd.extend(["-B", bep])
215 osp = qa_config.get("os-parameters", {})
216 for k, v in osp.items():
217 AssertCommand(["gnt-os", "modify", "-O", v, k])
219 # OS hypervisor parameters
220 os_hvp = qa_config.get("os-hvp", {})
221 for os_name in os_hvp:
222 for hv, hvp in os_hvp[os_name].items():
223 AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name])
226 def TestClusterRename():
227 """gnt-cluster rename"""
228 cmd = ["gnt-cluster", "rename", "-f"]
230 original_name = qa_config.get("name")
231 rename_target = qa_config.get("rename", None)
232 if rename_target is None:
233 print qa_utils.FormatError('"rename" entry is missing')
237 cmd + [rename_target],
239 cmd + [original_name],
245 def TestClusterOob():
246 """out-of-band framework"""
247 oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
249 AssertCommand(_CLUSTER_VERIFY)
250 AssertCommand(["gnt-cluster", "modify", "--node-parameters",
251 "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
254 AssertCommand(_CLUSTER_VERIFY, fail=True)
256 AssertCommand(["touch", oob_path_exists])
257 AssertCommand(["chmod", "0400", oob_path_exists])
258 AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
261 AssertCommand(["gnt-cluster", "modify", "--node-parameters",
262 "oob_program=%s" % oob_path_exists])
264 AssertCommand(_CLUSTER_VERIFY, fail=True)
266 AssertCommand(["chmod", "0500", oob_path_exists])
267 AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
269 AssertCommand(_CLUSTER_VERIFY)
271 AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
273 AssertCommand(["gnt-cluster", "modify", "--node-parameters",
277 def TestClusterEpo():
278 """gnt-cluster epo"""
279 master = qa_config.GetMasterNode()
281 # Assert that OOB is unavailable for all nodes
282 result_output = GetCommandOutput(master["primary"],
283 "gnt-node list --verbose --no-headers -o"
285 AssertEqual(compat.all(powered == "(unavail)"
286 for powered in result_output.splitlines()), True)
289 AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
290 # --all doesn't expect arguments
291 AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
293 # Unless --all is given master is not allowed to be in the list
294 AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
296 # This shouldn't fail
297 AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
299 # All instances should have been stopped now
300 result_output = GetCommandOutput(master["primary"],
301 "gnt-instance list --no-headers -o status")
302 # ERROR_down because the instance is stopped but not recorded as such
303 AssertEqual(compat.all(status == "ERROR_down"
304 for status in result_output.splitlines()), True)
306 # Now start everything again
307 AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
309 # All instances should have been started now
310 result_output = GetCommandOutput(master["primary"],
311 "gnt-instance list --no-headers -o status")
312 AssertEqual(compat.all(status == "running"
313 for status in result_output.splitlines()), True)
316 def TestClusterVerify():
317 """gnt-cluster verify"""
318 AssertCommand(_CLUSTER_VERIFY)
319 AssertCommand(["gnt-cluster", "verify-disks"])
323 """gnt-debug test-jobqueue"""
324 AssertCommand(["gnt-debug", "test-jobqueue"])
328 """gnt-debug delay"""
329 AssertCommand(["gnt-debug", "delay", "1"])
330 AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
331 AssertCommand(["gnt-debug", "delay", "--no-master",
332 "-n", node["primary"], "1"])
335 def TestClusterReservedLvs():
336 """gnt-cluster reserved lvs"""
338 (False, _CLUSTER_VERIFY),
339 (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
340 (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
341 (True, _CLUSTER_VERIFY),
342 (False, ["gnt-cluster", "modify", "--reserved-lvs",
343 "xenvg/qa-test,.*/other-test"]),
344 (False, _CLUSTER_VERIFY),
345 (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
346 (False, _CLUSTER_VERIFY),
347 (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
348 (True, _CLUSTER_VERIFY),
349 (False, ["lvremove", "-f", "xenvg/qa-test"]),
350 (False, _CLUSTER_VERIFY),
352 AssertCommand(cmd, fail=fail)
355 def TestClusterModifyEmpty():
356 """gnt-cluster modify"""
357 AssertCommand(["gnt-cluster", "modify"], fail=True)
360 def TestClusterModifyDisk():
361 """gnt-cluster modify -D"""
362 for param in _FAIL_PARAMS:
363 AssertCommand(["gnt-cluster", "modify", "-D", param], fail=True)
366 def TestClusterModifyBe():
367 """gnt-cluster modify -B"""
370 (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]),
371 (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
372 (False, ["gnt-cluster", "modify", "-B", "minmem=256"]),
373 (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
374 (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]),
375 (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
376 (True, ["gnt-cluster", "modify", "-B", "minmem=a"]),
377 (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
378 (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]),
379 (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]),
380 (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]),
382 (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
383 (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
384 (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]),
385 (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
386 (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
388 (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
389 (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
390 (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
391 (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
392 (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
394 AssertCommand(cmd, fail=fail)
396 # redo the original-requested BE parameters, if any
397 bep = qa_config.get("backend-parameters", "")
399 AssertCommand(["gnt-cluster", "modify", "-B", bep])
402 def TestClusterInfo():
403 """gnt-cluster info"""
404 AssertCommand(["gnt-cluster", "info"])
407 def TestClusterRedistConf():
408 """gnt-cluster redist-conf"""
409 AssertCommand(["gnt-cluster", "redist-conf"])
412 def TestClusterGetmaster():
413 """gnt-cluster getmaster"""
414 AssertCommand(["gnt-cluster", "getmaster"])
417 def TestClusterVersion():
418 """gnt-cluster version"""
419 AssertCommand(["gnt-cluster", "version"])
422 def TestClusterRenewCrypto():
423 """gnt-cluster renew-crypto"""
424 master = qa_config.GetMasterNode()
426 # Conflicting options
427 cmd = ["gnt-cluster", "renew-crypto", "--force",
428 "--new-cluster-certificate", "--new-confd-hmac-key"]
430 ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
431 ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
433 for i in conflicting:
434 AssertCommand(cmd + i, fail=True)
436 # Invalid RAPI certificate
437 cmd = ["gnt-cluster", "renew-crypto", "--force",
438 "--rapi-certificate=/dev/null"]
439 AssertCommand(cmd, fail=True)
441 rapi_cert_backup = qa_utils.BackupFile(master["primary"],
442 pathutils.RAPI_CERT_FILE)
444 # Custom RAPI certificate
445 fh = tempfile.NamedTemporaryFile()
447 # Ensure certificate doesn't cause "gnt-cluster verify" to complain
448 validity = constants.SSL_CERT_EXPIRATION_WARN * 3
450 utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
452 tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
454 AssertCommand(["gnt-cluster", "renew-crypto", "--force",
455 "--rapi-certificate=%s" % tmpcert])
457 AssertCommand(["rm", "-f", tmpcert])
459 # Custom cluster domain secret
460 cds_fh = tempfile.NamedTemporaryFile()
461 cds_fh.write(utils.GenerateSecret())
465 tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
467 AssertCommand(["gnt-cluster", "renew-crypto", "--force",
468 "--cluster-domain-secret=%s" % tmpcds])
470 AssertCommand(["rm", "-f", tmpcds])
473 AssertCommand(["gnt-cluster", "renew-crypto", "--force",
474 "--new-cluster-certificate", "--new-confd-hmac-key",
475 "--new-rapi-certificate", "--new-cluster-domain-secret"])
477 # Restore RAPI certificate
478 AssertCommand(["gnt-cluster", "renew-crypto", "--force",
479 "--rapi-certificate=%s" % rapi_cert_backup])
481 AssertCommand(["rm", "-f", rapi_cert_backup])
484 def TestClusterBurnin():
486 master = qa_config.GetMasterNode()
488 options = qa_config.get("options", {})
489 disk_template = options.get("burnin-disk-template", "drbd")
490 parallel = options.get("burnin-in-parallel", False)
491 check_inst = options.get("burnin-check-instances", False)
492 do_rename = options.get("burnin-rename", "")
493 do_reboot = options.get("burnin-reboot", True)
494 reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
496 # Get as many instances as we need
500 num = qa_config.get("options", {}).get("burnin-instances", 1)
501 for _ in range(0, num):
502 instances.append(qa_config.AcquireInstance())
503 except qa_error.OutOfInstancesError:
504 print "Not enough instances, continuing anyway."
506 if len(instances) < 1:
507 raise qa_error.Error("Burnin needs at least one instance")
509 script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
513 "--os=%s" % qa_config.get("os"),
514 "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM),
515 "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM),
516 "--disk-size=%s" % ",".join(qa_config.get("disk")),
517 "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")),
518 "--disk-template=%s" % disk_template]
520 cmd.append("--parallel")
521 cmd.append("--early-release")
523 cmd.append("--http-check")
525 cmd.append("--rename=%s" % do_rename)
527 cmd.append("--no-reboot")
529 cmd.append("--reboot-types=%s" % ",".join(reboot_types))
530 cmd += [inst["name"] for inst in instances]
533 AssertCommand(["rm", "-f", script])
536 for inst in instances:
537 qa_config.ReleaseInstance(inst)
540 def TestClusterMasterFailover():
541 """gnt-cluster master-failover"""
542 master = qa_config.GetMasterNode()
543 failovermaster = qa_config.AcquireNode(exclude=master)
545 cmd = ["gnt-cluster", "master-failover"]
547 AssertCommand(cmd, node=failovermaster)
548 # Back to original master node
549 AssertCommand(cmd, node=master)
551 qa_config.ReleaseNode(failovermaster)
554 def TestClusterMasterFailoverWithDrainedQueue():
555 """gnt-cluster master-failover with drained queue"""
556 drain_check = ["test", "-f", pathutils.JOB_QUEUE_DRAIN_FILE]
558 master = qa_config.GetMasterNode()
559 failovermaster = qa_config.AcquireNode(exclude=master)
561 # Ensure queue is not drained
562 for node in [master, failovermaster]:
563 AssertCommand(drain_check, node=node, fail=True)
565 # Drain queue on failover master
566 AssertCommand(["touch", pathutils.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
568 cmd = ["gnt-cluster", "master-failover"]
570 AssertCommand(drain_check, node=failovermaster)
571 AssertCommand(cmd, node=failovermaster)
572 AssertCommand(drain_check, fail=True)
573 AssertCommand(drain_check, node=failovermaster, fail=True)
575 # Back to original master node
576 AssertCommand(cmd, node=master)
578 qa_config.ReleaseNode(failovermaster)
580 AssertCommand(drain_check, fail=True)
581 AssertCommand(drain_check, node=failovermaster, fail=True)
584 def TestClusterCopyfile():
585 """gnt-cluster copyfile"""
586 master = qa_config.GetMasterNode()
588 uniqueid = utils.NewUUID()
590 # Create temporary file
591 f = tempfile.NamedTemporaryFile()
596 # Upload file to master node
597 testname = qa_utils.UploadFile(master["primary"], f.name)
599 # Copy file to all nodes
600 AssertCommand(["gnt-cluster", "copyfile", testname])
601 _CheckFileOnAllNodes(testname, uniqueid)
603 _RemoveFileFromAllNodes(testname)
606 def TestClusterCommand():
607 """gnt-cluster command"""
608 uniqueid = utils.NewUUID()
609 rfile = "/tmp/gnt%s" % utils.NewUUID()
610 rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid])
611 cmd = utils.ShellQuoteArgs(["gnt-cluster", "command",
612 "%s >%s" % (rcmd, rfile)])
616 _CheckFileOnAllNodes(rfile, uniqueid)
618 _RemoveFileFromAllNodes(rfile)
621 def TestClusterDestroy():
622 """gnt-cluster destroy"""
623 AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
626 def TestClusterRepairDiskSizes():
627 """gnt-cluster repair-disk-sizes"""
628 AssertCommand(["gnt-cluster", "repair-disk-sizes"])
631 def TestSetExclStorCluster(newvalue):
632 """Set the exclusive_storage node parameter at the cluster level.
635 @param newvalue: New value of exclusive_storage
637 @return: The old value of exclusive_storage
640 oldvalue = _GetBoolClusterField("exclusive_storage")
641 AssertCommand(["gnt-cluster", "modify", "--node-parameters",
642 "exclusive_storage=%s" % newvalue])
643 effvalue = _GetBoolClusterField("exclusive_storage")
644 if effvalue != newvalue:
645 raise qa_error.Error("exclusive_storage has the wrong value: %s instead"
646 " of %s" % (effvalue, newvalue))
650 def _BuildSetESCmd(value, node_name):
651 return ["gnt-node", "modify", "--node-parameters",
652 "exclusive_storage=%s" % value, node_name]
655 def TestExclStorSingleNode(node):
656 """cluster-verify reports exclusive_storage set only on one node.
659 node_name = node["primary"]
660 es_val = _GetBoolClusterField("exclusive_storage")
662 AssertCommand(_BuildSetESCmd(True, node_name))
663 AssertClusterVerify(fail=True, errors=[constants.CV_EGROUPMIXEDESFLAG])
664 AssertCommand(_BuildSetESCmd("default", node_name))
665 AssertClusterVerify()