Add QA test for “gnt-debug delay”
[ganeti-local] / qa / qa_cluster.py
1 #
2 #
3
4 # Copyright (C) 2007, 2010, 2011 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Cluster related QA tests.
23
24 """
25
26 import tempfile
27 import os.path
28
29 from ganeti import constants
30 from ganeti import compat
31 from ganeti import utils
32
33 import qa_config
34 import qa_utils
35 import qa_error
36
37 from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
38
39
40 #: cluster verify command
41 _CLUSTER_VERIFY = ["gnt-cluster", "verify"]
42
43
44 def _RemoveFileFromAllNodes(filename):
45   """Removes a file from all nodes.
46
47   """
48   for node in qa_config.get("nodes"):
49     AssertCommand(["rm", "-f", filename], node=node)
50
51
52 def _CheckFileOnAllNodes(filename, content):
53   """Verifies the content of the given file on all nodes.
54
55   """
56   cmd = utils.ShellQuoteArgs(["cat", filename])
57   for node in qa_config.get("nodes"):
58     AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
59
60
61 def TestClusterInit(rapi_user, rapi_secret):
62   """gnt-cluster init"""
63   master = qa_config.GetMasterNode()
64
65   rapi_dir = os.path.dirname(constants.RAPI_USERS_FILE)
66
67   # First create the RAPI credentials
68   fh = tempfile.NamedTemporaryFile()
69   try:
70     fh.write("%s %s write\n" % (rapi_user, rapi_secret))
71     fh.flush()
72
73     tmpru = qa_utils.UploadFile(master["primary"], fh.name)
74     try:
75       AssertCommand(["mkdir", "-p", rapi_dir])
76       AssertCommand(["mv", tmpru, constants.RAPI_USERS_FILE])
77     finally:
78       AssertCommand(["rm", "-f", tmpru])
79   finally:
80     fh.close()
81
82   # Initialize cluster
83   cmd = ["gnt-cluster", "init"]
84
85   cmd.append("--primary-ip-version=%d" %
86              qa_config.get("primary_ip_version", 4))
87
88   if master.get("secondary", None):
89     cmd.append("--secondary-ip=%s" % master["secondary"])
90
91   bridge = qa_config.get("bridge", None)
92   if bridge:
93     cmd.append("--bridge=%s" % bridge)
94     cmd.append("--master-netdev=%s" % bridge)
95
96   htype = qa_config.get("enabled-hypervisors", None)
97   if htype:
98     cmd.append("--enabled-hypervisors=%s" % htype)
99
100   cmd.append(qa_config.get("name"))
101
102   AssertCommand(cmd)
103
104   cmd = ["gnt-cluster", "modify"]
105   # hypervisor parameter modifications
106   hvp = qa_config.get("hypervisor-parameters", {})
107   for k, v in hvp.items():
108     cmd.extend(["-H", "%s:%s" % (k, v)])
109   # backend parameter modifications
110   bep = qa_config.get("backend-parameters", "")
111   if bep:
112     cmd.extend(["-B", bep])
113
114   if len(cmd) > 2:
115     AssertCommand(cmd)
116
117   # OS parameters
118   osp = qa_config.get("os-parameters", {})
119   for k, v in osp.items():
120     AssertCommand(["gnt-os", "modify", "-O", v, k])
121
122   # OS hypervisor parameters
123   os_hvp = qa_config.get("os-hvp", {})
124   for os_name in os_hvp:
125     for hv, hvp in os_hvp[os_name].items():
126       AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name])
127
128
129 def TestClusterRename():
130   """gnt-cluster rename"""
131   cmd = ["gnt-cluster", "rename", "-f"]
132
133   original_name = qa_config.get("name")
134   rename_target = qa_config.get("rename", None)
135   if rename_target is None:
136     print qa_utils.FormatError('"rename" entry is missing')
137     return
138
139   for data in [
140     cmd + [rename_target],
141     _CLUSTER_VERIFY,
142     cmd + [original_name],
143     _CLUSTER_VERIFY,
144     ]:
145     AssertCommand(data)
146
147
148 def TestClusterOob():
149   """out-of-band framework"""
150   oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
151
152   AssertCommand(_CLUSTER_VERIFY)
153   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
154                  "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
155                  utils.NewUUID()])
156
157   AssertCommand(_CLUSTER_VERIFY, fail=True)
158
159   AssertCommand(["touch", oob_path_exists])
160   AssertCommand(["chmod", "0400", oob_path_exists])
161   AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
162
163   try:
164     AssertCommand(["gnt-cluster", "modify", "--node-parameters",
165                    "oob_program=%s" % oob_path_exists])
166
167     AssertCommand(_CLUSTER_VERIFY, fail=True)
168
169     AssertCommand(["chmod", "0500", oob_path_exists])
170     AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
171
172     AssertCommand(_CLUSTER_VERIFY)
173   finally:
174     AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
175
176   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
177                  "oob_program="])
178
179
180 def TestClusterEpo():
181   """gnt-cluster epo"""
182   master = qa_config.GetMasterNode()
183
184   # Assert that OOB is unavailable for all nodes
185   result_output = GetCommandOutput(master["primary"],
186                                    "gnt-node list --verbose --no-header -o"
187                                    " powered")
188   AssertEqual(compat.all(powered == "(unavail)"
189                          for powered in result_output.splitlines()), True)
190
191   # Conflicting
192   AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
193   # --all doesn't expect arguments
194   AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
195
196   # Unless --all is given master is not allowed to be in the list
197   AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
198
199   # This shouldn't fail
200   AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
201
202   # All instances should have been stopped now
203   result_output = GetCommandOutput(master["primary"],
204                                    "gnt-instance list --no-header -o status")
205   AssertEqual(compat.all(status == "ADMIN_down"
206                          for status in result_output.splitlines()), True)
207
208   # Now start everything again
209   AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
210
211   # All instances should have been started now
212   result_output = GetCommandOutput(master["primary"],
213                                    "gnt-instance list --no-header -o status")
214   AssertEqual(compat.all(status == "running"
215                          for status in result_output.splitlines()), True)
216
217
218 def TestClusterVerify():
219   """gnt-cluster verify"""
220   AssertCommand(_CLUSTER_VERIFY)
221   AssertCommand(["gnt-cluster", "verify-disks"])
222
223
224 def TestJobqueue():
225   """gnt-debug test-jobqueue"""
226   AssertCommand(["gnt-debug", "test-jobqueue"])
227
228
229 def TestDelay(node):
230   """gnt-debug delay"""
231   AssertCommand(["gnt-debug", "delay", "1"])
232   AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
233   AssertCommand(["gnt-debug", "delay", "--no-master",
234                  "-n", node["primary"], "1"])
235
236
237 def TestClusterReservedLvs():
238   """gnt-cluster reserved lvs"""
239   for fail, cmd in [
240     (False, _CLUSTER_VERIFY),
241     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
242     (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
243     (True, _CLUSTER_VERIFY),
244     (False, ["gnt-cluster", "modify", "--reserved-lvs",
245              "xenvg/qa-test,.*/other-test"]),
246     (False, _CLUSTER_VERIFY),
247     (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
248     (False, _CLUSTER_VERIFY),
249     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
250     (True, _CLUSTER_VERIFY),
251     (False, ["lvremove", "-f", "xenvg/qa-test"]),
252     (False, _CLUSTER_VERIFY),
253     ]:
254     AssertCommand(cmd, fail=fail)
255
256
257 def TestClusterModifyBe():
258   """gnt-cluster modify -B"""
259   for fail, cmd in [
260     # mem
261     (False, ["gnt-cluster", "modify", "-B", "memory=256"]),
262     (False, ["sh", "-c", "gnt-cluster info|grep '^ *memory: 256$'"]),
263     (True, ["gnt-cluster", "modify", "-B", "memory=a"]),
264     (False, ["gnt-cluster", "modify", "-B", "memory=128"]),
265     (False, ["sh", "-c", "gnt-cluster info|grep '^ *memory: 128$'"]),
266     # vcpus
267     (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
268     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
269     (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]),
270     (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
271     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
272     # auto_balance
273     (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
274     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
275     (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
276     (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
277     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
278     ]:
279     AssertCommand(cmd, fail=fail)
280
281   # redo the original-requested BE parameters, if any
282   bep = qa_config.get("backend-parameters", "")
283   if bep:
284     AssertCommand(["gnt-cluster", "modify", "-B", bep])
285
286
287 def TestClusterInfo():
288   """gnt-cluster info"""
289   AssertCommand(["gnt-cluster", "info"])
290
291
292 def TestClusterRedistConf():
293   """gnt-cluster redist-conf"""
294   AssertCommand(["gnt-cluster", "redist-conf"])
295
296
297 def TestClusterGetmaster():
298   """gnt-cluster getmaster"""
299   AssertCommand(["gnt-cluster", "getmaster"])
300
301
302 def TestClusterVersion():
303   """gnt-cluster version"""
304   AssertCommand(["gnt-cluster", "version"])
305
306
307 def TestClusterRenewCrypto():
308   """gnt-cluster renew-crypto"""
309   master = qa_config.GetMasterNode()
310
311   # Conflicting options
312   cmd = ["gnt-cluster", "renew-crypto", "--force",
313          "--new-cluster-certificate", "--new-confd-hmac-key"]
314   conflicting = [
315     ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
316     ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
317     ]
318   for i in conflicting:
319     AssertCommand(cmd + i, fail=True)
320
321   # Invalid RAPI certificate
322   cmd = ["gnt-cluster", "renew-crypto", "--force",
323          "--rapi-certificate=/dev/null"]
324   AssertCommand(cmd, fail=True)
325
326   rapi_cert_backup = qa_utils.BackupFile(master["primary"],
327                                          constants.RAPI_CERT_FILE)
328   try:
329     # Custom RAPI certificate
330     fh = tempfile.NamedTemporaryFile()
331
332     # Ensure certificate doesn't cause "gnt-cluster verify" to complain
333     validity = constants.SSL_CERT_EXPIRATION_WARN * 3
334
335     utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
336
337     tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
338     try:
339       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
340                      "--rapi-certificate=%s" % tmpcert])
341     finally:
342       AssertCommand(["rm", "-f", tmpcert])
343
344     # Custom cluster domain secret
345     cds_fh = tempfile.NamedTemporaryFile()
346     cds_fh.write(utils.GenerateSecret())
347     cds_fh.write("\n")
348     cds_fh.flush()
349
350     tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
351     try:
352       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
353                      "--cluster-domain-secret=%s" % tmpcds])
354     finally:
355       AssertCommand(["rm", "-f", tmpcds])
356
357     # Normal case
358     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
359                    "--new-cluster-certificate", "--new-confd-hmac-key",
360                    "--new-rapi-certificate", "--new-cluster-domain-secret"])
361
362     # Restore RAPI certificate
363     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
364                    "--rapi-certificate=%s" % rapi_cert_backup])
365   finally:
366     AssertCommand(["rm", "-f", rapi_cert_backup])
367
368
369 def TestClusterBurnin():
370   """Burnin"""
371   master = qa_config.GetMasterNode()
372
373   options = qa_config.get("options", {})
374   disk_template = options.get("burnin-disk-template", "drbd")
375   parallel = options.get("burnin-in-parallel", False)
376   check_inst = options.get("burnin-check-instances", False)
377   do_rename = options.get("burnin-rename", "")
378   do_reboot = options.get("burnin-reboot", True)
379   reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
380
381   # Get as many instances as we need
382   instances = []
383   try:
384     try:
385       num = qa_config.get("options", {}).get("burnin-instances", 1)
386       for _ in range(0, num):
387         instances.append(qa_config.AcquireInstance())
388     except qa_error.OutOfInstancesError:
389       print "Not enough instances, continuing anyway."
390
391     if len(instances) < 1:
392       raise qa_error.Error("Burnin needs at least one instance")
393
394     script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
395     try:
396       # Run burnin
397       cmd = [script,
398              "--os=%s" % qa_config.get("os"),
399              "--disk-size=%s" % ",".join(qa_config.get("disk")),
400              "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")),
401              "--disk-template=%s" % disk_template]
402       if parallel:
403         cmd.append("--parallel")
404         cmd.append("--early-release")
405       if check_inst:
406         cmd.append("--http-check")
407       if do_rename:
408         cmd.append("--rename=%s" % do_rename)
409       if not do_reboot:
410         cmd.append("--no-reboot")
411       else:
412         cmd.append("--reboot-types=%s" % ",".join(reboot_types))
413       cmd += [inst["name"] for inst in instances]
414       AssertCommand(cmd)
415     finally:
416       AssertCommand(["rm", "-f", script])
417
418   finally:
419     for inst in instances:
420       qa_config.ReleaseInstance(inst)
421
422
423 def TestClusterMasterFailover():
424   """gnt-cluster master-failover"""
425   master = qa_config.GetMasterNode()
426   failovermaster = qa_config.AcquireNode(exclude=master)
427
428   cmd = ["gnt-cluster", "master-failover"]
429   try:
430     AssertCommand(cmd, node=failovermaster)
431     # Back to original master node
432     AssertCommand(cmd, node=master)
433   finally:
434     qa_config.ReleaseNode(failovermaster)
435
436
437 def TestClusterMasterFailoverWithDrainedQueue():
438   """gnt-cluster master-failover with drained queue"""
439   drain_check = ["test", "-f", constants.JOB_QUEUE_DRAIN_FILE]
440
441   master = qa_config.GetMasterNode()
442   failovermaster = qa_config.AcquireNode(exclude=master)
443
444   # Ensure queue is not drained
445   for node in [master, failovermaster]:
446     AssertCommand(drain_check, node=node, fail=True)
447
448   # Drain queue on failover master
449   AssertCommand(["touch", constants.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
450
451   cmd = ["gnt-cluster", "master-failover"]
452   try:
453     AssertCommand(drain_check, node=failovermaster)
454     AssertCommand(cmd, node=failovermaster)
455     AssertCommand(drain_check, fail=True)
456     AssertCommand(drain_check, node=failovermaster, fail=True)
457
458     # Back to original master node
459     AssertCommand(cmd, node=master)
460   finally:
461     qa_config.ReleaseNode(failovermaster)
462
463   AssertCommand(drain_check, fail=True)
464   AssertCommand(drain_check, node=failovermaster, fail=True)
465
466
467 def TestClusterCopyfile():
468   """gnt-cluster copyfile"""
469   master = qa_config.GetMasterNode()
470
471   uniqueid = utils.NewUUID()
472
473   # Create temporary file
474   f = tempfile.NamedTemporaryFile()
475   f.write(uniqueid)
476   f.flush()
477   f.seek(0)
478
479   # Upload file to master node
480   testname = qa_utils.UploadFile(master["primary"], f.name)
481   try:
482     # Copy file to all nodes
483     AssertCommand(["gnt-cluster", "copyfile", testname])
484     _CheckFileOnAllNodes(testname, uniqueid)
485   finally:
486     _RemoveFileFromAllNodes(testname)
487
488
489 def TestClusterCommand():
490   """gnt-cluster command"""
491   uniqueid = utils.NewUUID()
492   rfile = "/tmp/gnt%s" % utils.NewUUID()
493   rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid])
494   cmd = utils.ShellQuoteArgs(["gnt-cluster", "command",
495                               "%s >%s" % (rcmd, rfile)])
496
497   try:
498     AssertCommand(cmd)
499     _CheckFileOnAllNodes(rfile, uniqueid)
500   finally:
501     _RemoveFileFromAllNodes(rfile)
502
503
504 def TestClusterDestroy():
505   """gnt-cluster destroy"""
506   AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
507
508
509 def TestClusterRepairDiskSizes():
510   """gnt-cluster repair-disk-sizes"""
511   AssertCommand(["gnt-cluster", "repair-disk-sizes"])