Allow units in ipolicy disk/mem CLI changes
[ganeti-local] / qa / qa_cluster.py
1 #
2 #
3
4 # Copyright (C) 2007, 2010, 2011, 2012 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Cluster related QA tests.
23
24 """
25
26 import tempfile
27 import os.path
28
29 from ganeti import constants
30 from ganeti import compat
31 from ganeti import utils
32
33 import qa_config
34 import qa_utils
35 import qa_error
36
37 from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
38
39
40 #: cluster verify command
41 _CLUSTER_VERIFY = ["gnt-cluster", "verify"]
42
43
44 def _RemoveFileFromAllNodes(filename):
45   """Removes a file from all nodes.
46
47   """
48   for node in qa_config.get("nodes"):
49     AssertCommand(["rm", "-f", filename], node=node)
50
51
52 def _CheckFileOnAllNodes(filename, content):
53   """Verifies the content of the given file on all nodes.
54
55   """
56   cmd = utils.ShellQuoteArgs(["cat", filename])
57   for node in qa_config.get("nodes"):
58     AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
59
60
61 # data for testing failures due to bad keys/values for disk parameters
62 _FAIL_PARAMS = ["nonexistent:resync-rate=1",
63                 "drbd:nonexistent=1",
64                 "drbd:resync-rate=invalid",
65                 ]
66
67
68 def TestClusterInitDisk():
69   """gnt-cluster init -D"""
70   name = qa_config.get("name")
71   for param in _FAIL_PARAMS:
72     AssertCommand(["gnt-cluster", "init", "-D", param, name], fail=True)
73
74
75 def TestClusterInit(rapi_user, rapi_secret):
76   """gnt-cluster init"""
77   master = qa_config.GetMasterNode()
78
79   rapi_dir = os.path.dirname(constants.RAPI_USERS_FILE)
80
81   # First create the RAPI credentials
82   fh = tempfile.NamedTemporaryFile()
83   try:
84     fh.write("%s %s write\n" % (rapi_user, rapi_secret))
85     fh.flush()
86
87     tmpru = qa_utils.UploadFile(master["primary"], fh.name)
88     try:
89       AssertCommand(["mkdir", "-p", rapi_dir])
90       AssertCommand(["mv", tmpru, constants.RAPI_USERS_FILE])
91     finally:
92       AssertCommand(["rm", "-f", tmpru])
93   finally:
94     fh.close()
95
96   # Initialize cluster
97   cmd = ["gnt-cluster", "init"]
98
99   cmd.append("--primary-ip-version=%d" %
100              qa_config.get("primary_ip_version", 4))
101
102   for spec_type in ("mem-size", "disk-size", "disk-count", "cpu-count",
103                     "nic-count"):
104     for spec_val in ("min", "max", "std"):
105       spec = qa_config.get("ispec_%s_%s" %
106                            (spec_type.replace('-', '_'), spec_val), None)
107       if spec:
108         cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec))
109
110   if master.get("secondary", None):
111     cmd.append("--secondary-ip=%s" % master["secondary"])
112
113   bridge = qa_config.get("bridge", None)
114   if bridge:
115     cmd.append("--bridge=%s" % bridge)
116     cmd.append("--master-netdev=%s" % bridge)
117
118   htype = qa_config.get("enabled-hypervisors", None)
119   if htype:
120     cmd.append("--enabled-hypervisors=%s" % htype)
121
122   cmd.append(qa_config.get("name"))
123   AssertCommand(cmd)
124
125   cmd = ["gnt-cluster", "modify"]
126
127   # hypervisor parameter modifications
128   hvp = qa_config.get("hypervisor-parameters", {})
129   for k, v in hvp.items():
130     cmd.extend(["-H", "%s:%s" % (k, v)])
131   # backend parameter modifications
132   bep = qa_config.get("backend-parameters", "")
133   if bep:
134     cmd.extend(["-B", bep])
135
136   if len(cmd) > 2:
137     AssertCommand(cmd)
138
139   # OS parameters
140   osp = qa_config.get("os-parameters", {})
141   for k, v in osp.items():
142     AssertCommand(["gnt-os", "modify", "-O", v, k])
143
144   # OS hypervisor parameters
145   os_hvp = qa_config.get("os-hvp", {})
146   for os_name in os_hvp:
147     for hv, hvp in os_hvp[os_name].items():
148       AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name])
149
150
151 def TestClusterRename():
152   """gnt-cluster rename"""
153   cmd = ["gnt-cluster", "rename", "-f"]
154
155   original_name = qa_config.get("name")
156   rename_target = qa_config.get("rename", None)
157   if rename_target is None:
158     print qa_utils.FormatError('"rename" entry is missing')
159     return
160
161   for data in [
162     cmd + [rename_target],
163     _CLUSTER_VERIFY,
164     cmd + [original_name],
165     _CLUSTER_VERIFY,
166     ]:
167     AssertCommand(data)
168
169
170 def TestClusterOob():
171   """out-of-band framework"""
172   oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
173
174   AssertCommand(_CLUSTER_VERIFY)
175   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
176                  "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
177                  utils.NewUUID()])
178
179   AssertCommand(_CLUSTER_VERIFY, fail=True)
180
181   AssertCommand(["touch", oob_path_exists])
182   AssertCommand(["chmod", "0400", oob_path_exists])
183   AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
184
185   try:
186     AssertCommand(["gnt-cluster", "modify", "--node-parameters",
187                    "oob_program=%s" % oob_path_exists])
188
189     AssertCommand(_CLUSTER_VERIFY, fail=True)
190
191     AssertCommand(["chmod", "0500", oob_path_exists])
192     AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
193
194     AssertCommand(_CLUSTER_VERIFY)
195   finally:
196     AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
197
198   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
199                  "oob_program="])
200
201
202 def TestClusterEpo():
203   """gnt-cluster epo"""
204   master = qa_config.GetMasterNode()
205
206   # Assert that OOB is unavailable for all nodes
207   result_output = GetCommandOutput(master["primary"],
208                                    "gnt-node list --verbose --no-headers -o"
209                                    " powered")
210   AssertEqual(compat.all(powered == "(unavail)"
211                          for powered in result_output.splitlines()), True)
212
213   # Conflicting
214   AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
215   # --all doesn't expect arguments
216   AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
217
218   # Unless --all is given master is not allowed to be in the list
219   AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
220
221   # This shouldn't fail
222   AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
223
224   # All instances should have been stopped now
225   result_output = GetCommandOutput(master["primary"],
226                                    "gnt-instance list --no-headers -o status")
227   # ERROR_down because the instance is stopped but not recorded as such
228   AssertEqual(compat.all(status == "ERROR_down"
229                          for status in result_output.splitlines()), True)
230
231   # Now start everything again
232   AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
233
234   # All instances should have been started now
235   result_output = GetCommandOutput(master["primary"],
236                                    "gnt-instance list --no-headers -o status")
237   AssertEqual(compat.all(status == "running"
238                          for status in result_output.splitlines()), True)
239
240
241 def TestClusterVerify():
242   """gnt-cluster verify"""
243   AssertCommand(_CLUSTER_VERIFY)
244   AssertCommand(["gnt-cluster", "verify-disks"])
245
246
247 def TestJobqueue():
248   """gnt-debug test-jobqueue"""
249   AssertCommand(["gnt-debug", "test-jobqueue"])
250
251
252 def TestDelay(node):
253   """gnt-debug delay"""
254   AssertCommand(["gnt-debug", "delay", "1"])
255   AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
256   AssertCommand(["gnt-debug", "delay", "--no-master",
257                  "-n", node["primary"], "1"])
258
259
260 def TestClusterReservedLvs():
261   """gnt-cluster reserved lvs"""
262   for fail, cmd in [
263     (False, _CLUSTER_VERIFY),
264     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
265     (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
266     (True, _CLUSTER_VERIFY),
267     (False, ["gnt-cluster", "modify", "--reserved-lvs",
268              "xenvg/qa-test,.*/other-test"]),
269     (False, _CLUSTER_VERIFY),
270     (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
271     (False, _CLUSTER_VERIFY),
272     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
273     (True, _CLUSTER_VERIFY),
274     (False, ["lvremove", "-f", "xenvg/qa-test"]),
275     (False, _CLUSTER_VERIFY),
276     ]:
277     AssertCommand(cmd, fail=fail)
278
279
280 def TestClusterModifyEmpty():
281   """gnt-cluster modify"""
282   AssertCommand(["gnt-cluster", "modify"], fail=True)
283
284
285 def TestClusterModifyDisk():
286   """gnt-cluster modify -D"""
287   for param in _FAIL_PARAMS:
288     AssertCommand(["gnt-cluster", "modify", "-D", param], fail=True)
289
290
291 def TestClusterModifyBe():
292   """gnt-cluster modify -B"""
293   for fail, cmd in [
294     # max/min mem
295     (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]),
296     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
297     (False, ["gnt-cluster", "modify", "-B", "minmem=256"]),
298     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
299     (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]),
300     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
301     (True, ["gnt-cluster", "modify", "-B", "minmem=a"]),
302     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
303     (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]),
304     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]),
305     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]),
306     # vcpus
307     (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
308     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
309     (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]),
310     (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
311     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
312     # auto_balance
313     (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
314     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
315     (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
316     (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
317     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
318     ]:
319     AssertCommand(cmd, fail=fail)
320
321   # redo the original-requested BE parameters, if any
322   bep = qa_config.get("backend-parameters", "")
323   if bep:
324     AssertCommand(["gnt-cluster", "modify", "-B", bep])
325
326
327 def TestClusterInfo():
328   """gnt-cluster info"""
329   AssertCommand(["gnt-cluster", "info"])
330
331
332 def TestClusterRedistConf():
333   """gnt-cluster redist-conf"""
334   AssertCommand(["gnt-cluster", "redist-conf"])
335
336
337 def TestClusterGetmaster():
338   """gnt-cluster getmaster"""
339   AssertCommand(["gnt-cluster", "getmaster"])
340
341
342 def TestClusterVersion():
343   """gnt-cluster version"""
344   AssertCommand(["gnt-cluster", "version"])
345
346
347 def TestClusterRenewCrypto():
348   """gnt-cluster renew-crypto"""
349   master = qa_config.GetMasterNode()
350
351   # Conflicting options
352   cmd = ["gnt-cluster", "renew-crypto", "--force",
353          "--new-cluster-certificate", "--new-confd-hmac-key"]
354   conflicting = [
355     ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
356     ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
357     ]
358   for i in conflicting:
359     AssertCommand(cmd + i, fail=True)
360
361   # Invalid RAPI certificate
362   cmd = ["gnt-cluster", "renew-crypto", "--force",
363          "--rapi-certificate=/dev/null"]
364   AssertCommand(cmd, fail=True)
365
366   rapi_cert_backup = qa_utils.BackupFile(master["primary"],
367                                          constants.RAPI_CERT_FILE)
368   try:
369     # Custom RAPI certificate
370     fh = tempfile.NamedTemporaryFile()
371
372     # Ensure certificate doesn't cause "gnt-cluster verify" to complain
373     validity = constants.SSL_CERT_EXPIRATION_WARN * 3
374
375     utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
376
377     tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
378     try:
379       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
380                      "--rapi-certificate=%s" % tmpcert])
381     finally:
382       AssertCommand(["rm", "-f", tmpcert])
383
384     # Custom cluster domain secret
385     cds_fh = tempfile.NamedTemporaryFile()
386     cds_fh.write(utils.GenerateSecret())
387     cds_fh.write("\n")
388     cds_fh.flush()
389
390     tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
391     try:
392       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
393                      "--cluster-domain-secret=%s" % tmpcds])
394     finally:
395       AssertCommand(["rm", "-f", tmpcds])
396
397     # Normal case
398     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
399                    "--new-cluster-certificate", "--new-confd-hmac-key",
400                    "--new-rapi-certificate", "--new-cluster-domain-secret"])
401
402     # Restore RAPI certificate
403     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
404                    "--rapi-certificate=%s" % rapi_cert_backup])
405   finally:
406     AssertCommand(["rm", "-f", rapi_cert_backup])
407
408
409 def TestClusterBurnin():
410   """Burnin"""
411   master = qa_config.GetMasterNode()
412
413   options = qa_config.get("options", {})
414   disk_template = options.get("burnin-disk-template", "drbd")
415   parallel = options.get("burnin-in-parallel", False)
416   check_inst = options.get("burnin-check-instances", False)
417   do_rename = options.get("burnin-rename", "")
418   do_reboot = options.get("burnin-reboot", True)
419   reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
420
421   # Get as many instances as we need
422   instances = []
423   try:
424     try:
425       num = qa_config.get("options", {}).get("burnin-instances", 1)
426       for _ in range(0, num):
427         instances.append(qa_config.AcquireInstance())
428     except qa_error.OutOfInstancesError:
429       print "Not enough instances, continuing anyway."
430
431     if len(instances) < 1:
432       raise qa_error.Error("Burnin needs at least one instance")
433
434     script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
435     try:
436       # Run burnin
437       cmd = [script,
438              "--os=%s" % qa_config.get("os"),
439              "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM),
440              "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM),
441              "--disk-size=%s" % ",".join(qa_config.get("disk")),
442              "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")),
443              "--disk-template=%s" % disk_template]
444       if parallel:
445         cmd.append("--parallel")
446         cmd.append("--early-release")
447       if check_inst:
448         cmd.append("--http-check")
449       if do_rename:
450         cmd.append("--rename=%s" % do_rename)
451       if not do_reboot:
452         cmd.append("--no-reboot")
453       else:
454         cmd.append("--reboot-types=%s" % ",".join(reboot_types))
455       cmd += [inst["name"] for inst in instances]
456       AssertCommand(cmd)
457     finally:
458       AssertCommand(["rm", "-f", script])
459
460   finally:
461     for inst in instances:
462       qa_config.ReleaseInstance(inst)
463
464
465 def TestClusterMasterFailover():
466   """gnt-cluster master-failover"""
467   master = qa_config.GetMasterNode()
468   failovermaster = qa_config.AcquireNode(exclude=master)
469
470   cmd = ["gnt-cluster", "master-failover"]
471   try:
472     AssertCommand(cmd, node=failovermaster)
473     # Back to original master node
474     AssertCommand(cmd, node=master)
475   finally:
476     qa_config.ReleaseNode(failovermaster)
477
478
479 def TestClusterMasterFailoverWithDrainedQueue():
480   """gnt-cluster master-failover with drained queue"""
481   drain_check = ["test", "-f", constants.JOB_QUEUE_DRAIN_FILE]
482
483   master = qa_config.GetMasterNode()
484   failovermaster = qa_config.AcquireNode(exclude=master)
485
486   # Ensure queue is not drained
487   for node in [master, failovermaster]:
488     AssertCommand(drain_check, node=node, fail=True)
489
490   # Drain queue on failover master
491   AssertCommand(["touch", constants.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
492
493   cmd = ["gnt-cluster", "master-failover"]
494   try:
495     AssertCommand(drain_check, node=failovermaster)
496     AssertCommand(cmd, node=failovermaster)
497     AssertCommand(drain_check, fail=True)
498     AssertCommand(drain_check, node=failovermaster, fail=True)
499
500     # Back to original master node
501     AssertCommand(cmd, node=master)
502   finally:
503     qa_config.ReleaseNode(failovermaster)
504
505   AssertCommand(drain_check, fail=True)
506   AssertCommand(drain_check, node=failovermaster, fail=True)
507
508
509 def TestClusterCopyfile():
510   """gnt-cluster copyfile"""
511   master = qa_config.GetMasterNode()
512
513   uniqueid = utils.NewUUID()
514
515   # Create temporary file
516   f = tempfile.NamedTemporaryFile()
517   f.write(uniqueid)
518   f.flush()
519   f.seek(0)
520
521   # Upload file to master node
522   testname = qa_utils.UploadFile(master["primary"], f.name)
523   try:
524     # Copy file to all nodes
525     AssertCommand(["gnt-cluster", "copyfile", testname])
526     _CheckFileOnAllNodes(testname, uniqueid)
527   finally:
528     _RemoveFileFromAllNodes(testname)
529
530
531 def TestClusterCommand():
532   """gnt-cluster command"""
533   uniqueid = utils.NewUUID()
534   rfile = "/tmp/gnt%s" % utils.NewUUID()
535   rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid])
536   cmd = utils.ShellQuoteArgs(["gnt-cluster", "command",
537                               "%s >%s" % (rcmd, rfile)])
538
539   try:
540     AssertCommand(cmd)
541     _CheckFileOnAllNodes(rfile, uniqueid)
542   finally:
543     _RemoveFileFromAllNodes(rfile)
544
545
546 def TestClusterDestroy():
547   """gnt-cluster destroy"""
548   AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
549
550
551 def TestClusterRepairDiskSizes():
552   """gnt-cluster repair-disk-sizes"""
553   AssertCommand(["gnt-cluster", "repair-disk-sizes"])