QA: Do not pass "--bridge" to "gnt-cluster init"
[ganeti-local] / qa / qa_cluster.py
1 #
2 #
3
4 # Copyright (C) 2007, 2010, 2011, 2012 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Cluster related QA tests.
23
24 """
25
26 import tempfile
27 import os.path
28
29 from ganeti import constants
30 from ganeti import compat
31 from ganeti import utils
32 from ganeti import pathutils
33
34 import qa_config
35 import qa_utils
36 import qa_error
37
38 from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
39
40
41 #: cluster verify command
42 _CLUSTER_VERIFY = ["gnt-cluster", "verify"]
43
44
45 def _RemoveFileFromAllNodes(filename):
46   """Removes a file from all nodes.
47
48   """
49   for node in qa_config.get("nodes"):
50     AssertCommand(["rm", "-f", filename], node=node)
51
52
53 def _CheckFileOnAllNodes(filename, content):
54   """Verifies the content of the given file on all nodes.
55
56   """
57   cmd = utils.ShellQuoteArgs(["cat", filename])
58   for node in qa_config.get("nodes"):
59     AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
60
61
62 # data for testing failures due to bad keys/values for disk parameters
63 _FAIL_PARAMS = ["nonexistent:resync-rate=1",
64                 "drbd:nonexistent=1",
65                 "drbd:resync-rate=invalid",
66                 ]
67
68
69 def TestClusterInitDisk():
70   """gnt-cluster init -D"""
71   name = qa_config.get("name")
72   for param in _FAIL_PARAMS:
73     AssertCommand(["gnt-cluster", "init", "-D", param, name], fail=True)
74
75
76 def TestClusterInit(rapi_user, rapi_secret):
77   """gnt-cluster init"""
78   master = qa_config.GetMasterNode()
79
80   rapi_dir = os.path.dirname(pathutils.RAPI_USERS_FILE)
81
82   # First create the RAPI credentials
83   fh = tempfile.NamedTemporaryFile()
84   try:
85     fh.write("%s %s write\n" % (rapi_user, rapi_secret))
86     fh.flush()
87
88     tmpru = qa_utils.UploadFile(master["primary"], fh.name)
89     try:
90       AssertCommand(["mkdir", "-p", rapi_dir])
91       AssertCommand(["mv", tmpru, pathutils.RAPI_USERS_FILE])
92     finally:
93       AssertCommand(["rm", "-f", tmpru])
94   finally:
95     fh.close()
96
97   # Initialize cluster
98   cmd = [
99     "gnt-cluster", "init",
100     "--primary-ip-version=%d" % qa_config.get("primary_ip_version", 4),
101     "--enabled-hypervisors=%s" % ",".join(qa_config.GetEnabledHypervisors()),
102     ]
103
104   for spec_type in ("mem-size", "disk-size", "disk-count", "cpu-count",
105                     "nic-count"):
106     for spec_val in ("min", "max", "std"):
107       spec = qa_config.get("ispec_%s_%s" %
108                            (spec_type.replace('-', '_'), spec_val), None)
109       if spec:
110         cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec))
111
112   if master.get("secondary", None):
113     cmd.append("--secondary-ip=%s" % master["secondary"])
114
115   bridge = qa_config.get("bridge", None)
116   if bridge:
117     cmd.append("--master-netdev=%s" % bridge)
118
119   cmd.append(qa_config.get("name"))
120   AssertCommand(cmd)
121
122   cmd = ["gnt-cluster", "modify"]
123
124   # hypervisor parameter modifications
125   hvp = qa_config.get("hypervisor-parameters", {})
126   for k, v in hvp.items():
127     cmd.extend(["-H", "%s:%s" % (k, v)])
128   # backend parameter modifications
129   bep = qa_config.get("backend-parameters", "")
130   if bep:
131     cmd.extend(["-B", bep])
132
133   if len(cmd) > 2:
134     AssertCommand(cmd)
135
136   # OS parameters
137   osp = qa_config.get("os-parameters", {})
138   for k, v in osp.items():
139     AssertCommand(["gnt-os", "modify", "-O", v, k])
140
141   # OS hypervisor parameters
142   os_hvp = qa_config.get("os-hvp", {})
143   for os_name in os_hvp:
144     for hv, hvp in os_hvp[os_name].items():
145       AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name])
146
147
148 def TestClusterRename():
149   """gnt-cluster rename"""
150   cmd = ["gnt-cluster", "rename", "-f"]
151
152   original_name = qa_config.get("name")
153   rename_target = qa_config.get("rename", None)
154   if rename_target is None:
155     print qa_utils.FormatError('"rename" entry is missing')
156     return
157
158   for data in [
159     cmd + [rename_target],
160     _CLUSTER_VERIFY,
161     cmd + [original_name],
162     _CLUSTER_VERIFY,
163     ]:
164     AssertCommand(data)
165
166
167 def TestClusterOob():
168   """out-of-band framework"""
169   oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
170
171   AssertCommand(_CLUSTER_VERIFY)
172   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
173                  "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
174                  utils.NewUUID()])
175
176   AssertCommand(_CLUSTER_VERIFY, fail=True)
177
178   AssertCommand(["touch", oob_path_exists])
179   AssertCommand(["chmod", "0400", oob_path_exists])
180   AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
181
182   try:
183     AssertCommand(["gnt-cluster", "modify", "--node-parameters",
184                    "oob_program=%s" % oob_path_exists])
185
186     AssertCommand(_CLUSTER_VERIFY, fail=True)
187
188     AssertCommand(["chmod", "0500", oob_path_exists])
189     AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
190
191     AssertCommand(_CLUSTER_VERIFY)
192   finally:
193     AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
194
195   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
196                  "oob_program="])
197
198
199 def TestClusterEpo():
200   """gnt-cluster epo"""
201   master = qa_config.GetMasterNode()
202
203   # Assert that OOB is unavailable for all nodes
204   result_output = GetCommandOutput(master["primary"],
205                                    "gnt-node list --verbose --no-headers -o"
206                                    " powered")
207   AssertEqual(compat.all(powered == "(unavail)"
208                          for powered in result_output.splitlines()), True)
209
210   # Conflicting
211   AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
212   # --all doesn't expect arguments
213   AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
214
215   # Unless --all is given master is not allowed to be in the list
216   AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
217
218   # This shouldn't fail
219   AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
220
221   # All instances should have been stopped now
222   result_output = GetCommandOutput(master["primary"],
223                                    "gnt-instance list --no-headers -o status")
224   # ERROR_down because the instance is stopped but not recorded as such
225   AssertEqual(compat.all(status == "ERROR_down"
226                          for status in result_output.splitlines()), True)
227
228   # Now start everything again
229   AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
230
231   # All instances should have been started now
232   result_output = GetCommandOutput(master["primary"],
233                                    "gnt-instance list --no-headers -o status")
234   AssertEqual(compat.all(status == "running"
235                          for status in result_output.splitlines()), True)
236
237
238 def TestClusterVerify():
239   """gnt-cluster verify"""
240   AssertCommand(_CLUSTER_VERIFY)
241   AssertCommand(["gnt-cluster", "verify-disks"])
242
243
244 def TestJobqueue():
245   """gnt-debug test-jobqueue"""
246   AssertCommand(["gnt-debug", "test-jobqueue"])
247
248
249 def TestDelay(node):
250   """gnt-debug delay"""
251   AssertCommand(["gnt-debug", "delay", "1"])
252   AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
253   AssertCommand(["gnt-debug", "delay", "--no-master",
254                  "-n", node["primary"], "1"])
255
256
257 def TestClusterReservedLvs():
258   """gnt-cluster reserved lvs"""
259   for fail, cmd in [
260     (False, _CLUSTER_VERIFY),
261     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
262     (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
263     (True, _CLUSTER_VERIFY),
264     (False, ["gnt-cluster", "modify", "--reserved-lvs",
265              "xenvg/qa-test,.*/other-test"]),
266     (False, _CLUSTER_VERIFY),
267     (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
268     (False, _CLUSTER_VERIFY),
269     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
270     (True, _CLUSTER_VERIFY),
271     (False, ["lvremove", "-f", "xenvg/qa-test"]),
272     (False, _CLUSTER_VERIFY),
273     ]:
274     AssertCommand(cmd, fail=fail)
275
276
277 def TestClusterModifyEmpty():
278   """gnt-cluster modify"""
279   AssertCommand(["gnt-cluster", "modify"], fail=True)
280
281
282 def TestClusterModifyDisk():
283   """gnt-cluster modify -D"""
284   for param in _FAIL_PARAMS:
285     AssertCommand(["gnt-cluster", "modify", "-D", param], fail=True)
286
287
288 def TestClusterModifyBe():
289   """gnt-cluster modify -B"""
290   for fail, cmd in [
291     # max/min mem
292     (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]),
293     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
294     (False, ["gnt-cluster", "modify", "-B", "minmem=256"]),
295     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
296     (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]),
297     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
298     (True, ["gnt-cluster", "modify", "-B", "minmem=a"]),
299     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
300     (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]),
301     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]),
302     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]),
303     # vcpus
304     (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
305     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
306     (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]),
307     (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
308     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
309     # auto_balance
310     (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
311     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
312     (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
313     (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
314     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
315     ]:
316     AssertCommand(cmd, fail=fail)
317
318   # redo the original-requested BE parameters, if any
319   bep = qa_config.get("backend-parameters", "")
320   if bep:
321     AssertCommand(["gnt-cluster", "modify", "-B", bep])
322
323
324 def TestClusterInfo():
325   """gnt-cluster info"""
326   AssertCommand(["gnt-cluster", "info"])
327
328
329 def TestClusterRedistConf():
330   """gnt-cluster redist-conf"""
331   AssertCommand(["gnt-cluster", "redist-conf"])
332
333
334 def TestClusterGetmaster():
335   """gnt-cluster getmaster"""
336   AssertCommand(["gnt-cluster", "getmaster"])
337
338
339 def TestClusterVersion():
340   """gnt-cluster version"""
341   AssertCommand(["gnt-cluster", "version"])
342
343
344 def TestClusterRenewCrypto():
345   """gnt-cluster renew-crypto"""
346   master = qa_config.GetMasterNode()
347
348   # Conflicting options
349   cmd = ["gnt-cluster", "renew-crypto", "--force",
350          "--new-cluster-certificate", "--new-confd-hmac-key"]
351   conflicting = [
352     ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
353     ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
354     ]
355   for i in conflicting:
356     AssertCommand(cmd + i, fail=True)
357
358   # Invalid RAPI certificate
359   cmd = ["gnt-cluster", "renew-crypto", "--force",
360          "--rapi-certificate=/dev/null"]
361   AssertCommand(cmd, fail=True)
362
363   rapi_cert_backup = qa_utils.BackupFile(master["primary"],
364                                          pathutils.RAPI_CERT_FILE)
365   try:
366     # Custom RAPI certificate
367     fh = tempfile.NamedTemporaryFile()
368
369     # Ensure certificate doesn't cause "gnt-cluster verify" to complain
370     validity = constants.SSL_CERT_EXPIRATION_WARN * 3
371
372     utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
373
374     tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
375     try:
376       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
377                      "--rapi-certificate=%s" % tmpcert])
378     finally:
379       AssertCommand(["rm", "-f", tmpcert])
380
381     # Custom cluster domain secret
382     cds_fh = tempfile.NamedTemporaryFile()
383     cds_fh.write(utils.GenerateSecret())
384     cds_fh.write("\n")
385     cds_fh.flush()
386
387     tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
388     try:
389       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
390                      "--cluster-domain-secret=%s" % tmpcds])
391     finally:
392       AssertCommand(["rm", "-f", tmpcds])
393
394     # Normal case
395     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
396                    "--new-cluster-certificate", "--new-confd-hmac-key",
397                    "--new-rapi-certificate", "--new-cluster-domain-secret"])
398
399     # Restore RAPI certificate
400     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
401                    "--rapi-certificate=%s" % rapi_cert_backup])
402   finally:
403     AssertCommand(["rm", "-f", rapi_cert_backup])
404
405
406 def TestClusterBurnin():
407   """Burnin"""
408   master = qa_config.GetMasterNode()
409
410   options = qa_config.get("options", {})
411   disk_template = options.get("burnin-disk-template", "drbd")
412   parallel = options.get("burnin-in-parallel", False)
413   check_inst = options.get("burnin-check-instances", False)
414   do_rename = options.get("burnin-rename", "")
415   do_reboot = options.get("burnin-reboot", True)
416   reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
417
418   # Get as many instances as we need
419   instances = []
420   try:
421     try:
422       num = qa_config.get("options", {}).get("burnin-instances", 1)
423       for _ in range(0, num):
424         instances.append(qa_config.AcquireInstance())
425     except qa_error.OutOfInstancesError:
426       print "Not enough instances, continuing anyway."
427
428     if len(instances) < 1:
429       raise qa_error.Error("Burnin needs at least one instance")
430
431     script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
432     try:
433       # Run burnin
434       cmd = [script,
435              "--os=%s" % qa_config.get("os"),
436              "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM),
437              "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM),
438              "--disk-size=%s" % ",".join(qa_config.get("disk")),
439              "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")),
440              "--disk-template=%s" % disk_template]
441       if parallel:
442         cmd.append("--parallel")
443         cmd.append("--early-release")
444       if check_inst:
445         cmd.append("--http-check")
446       if do_rename:
447         cmd.append("--rename=%s" % do_rename)
448       if not do_reboot:
449         cmd.append("--no-reboot")
450       else:
451         cmd.append("--reboot-types=%s" % ",".join(reboot_types))
452       cmd += [inst["name"] for inst in instances]
453       AssertCommand(cmd)
454     finally:
455       AssertCommand(["rm", "-f", script])
456
457   finally:
458     for inst in instances:
459       qa_config.ReleaseInstance(inst)
460
461
462 def TestClusterMasterFailover():
463   """gnt-cluster master-failover"""
464   master = qa_config.GetMasterNode()
465   failovermaster = qa_config.AcquireNode(exclude=master)
466
467   cmd = ["gnt-cluster", "master-failover"]
468   try:
469     AssertCommand(cmd, node=failovermaster)
470     # Back to original master node
471     AssertCommand(cmd, node=master)
472   finally:
473     qa_config.ReleaseNode(failovermaster)
474
475
476 def TestClusterMasterFailoverWithDrainedQueue():
477   """gnt-cluster master-failover with drained queue"""
478   drain_check = ["test", "-f", pathutils.JOB_QUEUE_DRAIN_FILE]
479
480   master = qa_config.GetMasterNode()
481   failovermaster = qa_config.AcquireNode(exclude=master)
482
483   # Ensure queue is not drained
484   for node in [master, failovermaster]:
485     AssertCommand(drain_check, node=node, fail=True)
486
487   # Drain queue on failover master
488   AssertCommand(["touch", pathutils.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
489
490   cmd = ["gnt-cluster", "master-failover"]
491   try:
492     AssertCommand(drain_check, node=failovermaster)
493     AssertCommand(cmd, node=failovermaster)
494     AssertCommand(drain_check, fail=True)
495     AssertCommand(drain_check, node=failovermaster, fail=True)
496
497     # Back to original master node
498     AssertCommand(cmd, node=master)
499   finally:
500     qa_config.ReleaseNode(failovermaster)
501
502   AssertCommand(drain_check, fail=True)
503   AssertCommand(drain_check, node=failovermaster, fail=True)
504
505
506 def TestClusterCopyfile():
507   """gnt-cluster copyfile"""
508   master = qa_config.GetMasterNode()
509
510   uniqueid = utils.NewUUID()
511
512   # Create temporary file
513   f = tempfile.NamedTemporaryFile()
514   f.write(uniqueid)
515   f.flush()
516   f.seek(0)
517
518   # Upload file to master node
519   testname = qa_utils.UploadFile(master["primary"], f.name)
520   try:
521     # Copy file to all nodes
522     AssertCommand(["gnt-cluster", "copyfile", testname])
523     _CheckFileOnAllNodes(testname, uniqueid)
524   finally:
525     _RemoveFileFromAllNodes(testname)
526
527
528 def TestClusterCommand():
529   """gnt-cluster command"""
530   uniqueid = utils.NewUUID()
531   rfile = "/tmp/gnt%s" % utils.NewUUID()
532   rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid])
533   cmd = utils.ShellQuoteArgs(["gnt-cluster", "command",
534                               "%s >%s" % (rcmd, rfile)])
535
536   try:
537     AssertCommand(cmd)
538     _CheckFileOnAllNodes(rfile, uniqueid)
539   finally:
540     _RemoveFileFromAllNodes(rfile)
541
542
543 def TestClusterDestroy():
544   """gnt-cluster destroy"""
545   AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
546
547
548 def TestClusterRepairDiskSizes():
549   """gnt-cluster repair-disk-sizes"""
550   AssertCommand(["gnt-cluster", "repair-disk-sizes"])