Stricter check for OS modifications passed to OpClusterSetParams
[ganeti-local] / qa / qa_cluster.py
1 #
2 #
3
4 # Copyright (C) 2007, 2010, 2011, 2012 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Cluster related QA tests.
23
24 """
25
26 import tempfile
27 import os.path
28
29 from ganeti import constants
30 from ganeti import compat
31 from ganeti import utils
32
33 import qa_config
34 import qa_utils
35 import qa_error
36
37 from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
38
39
40 #: cluster verify command
41 _CLUSTER_VERIFY = ["gnt-cluster", "verify"]
42
43
44 def _RemoveFileFromAllNodes(filename):
45   """Removes a file from all nodes.
46
47   """
48   for node in qa_config.get("nodes"):
49     AssertCommand(["rm", "-f", filename], node=node)
50
51
52 def _CheckFileOnAllNodes(filename, content):
53   """Verifies the content of the given file on all nodes.
54
55   """
56   cmd = utils.ShellQuoteArgs(["cat", filename])
57   for node in qa_config.get("nodes"):
58     AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
59
60
61 # data for testing failures due to bad keys/values for disk parameters
62 _FAIL_PARAMS = ["nonexistent:resync-rate=1",
63                 "drbd:nonexistent=1",
64                 "drbd:resync-rate=invalid",
65                 ]
66
67
68 def TestClusterInitDisk():
69   """gnt-cluster init -D"""
70   name = qa_config.get("name")
71   for param in _FAIL_PARAMS:
72     AssertCommand(["gnt-cluster", "init", "-D", param, name], fail=True)
73
74
75 def TestClusterInit(rapi_user, rapi_secret):
76   """gnt-cluster init"""
77   master = qa_config.GetMasterNode()
78
79   rapi_dir = os.path.dirname(constants.RAPI_USERS_FILE)
80
81   # First create the RAPI credentials
82   fh = tempfile.NamedTemporaryFile()
83   try:
84     fh.write("%s %s write\n" % (rapi_user, rapi_secret))
85     fh.flush()
86
87     tmpru = qa_utils.UploadFile(master["primary"], fh.name)
88     try:
89       AssertCommand(["mkdir", "-p", rapi_dir])
90       AssertCommand(["mv", tmpru, constants.RAPI_USERS_FILE])
91     finally:
92       AssertCommand(["rm", "-f", tmpru])
93   finally:
94     fh.close()
95
96   # Initialize cluster
97   cmd = ["gnt-cluster", "init"]
98
99   cmd.append("--primary-ip-version=%d" %
100              qa_config.get("primary_ip_version", 4))
101
102   for spec_type in ("mem-size", "disk-size", "disk-count", "cpu-count",
103                     "nic-count"):
104     for spec_val in ("min", "max", "std"):
105       spec = qa_config.get("ispec_%s_%s" %
106                            (spec_type.replace('-', '_'), spec_val), None)
107       if spec:
108         cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec))
109
110   if master.get("secondary", None):
111     cmd.append("--secondary-ip=%s" % master["secondary"])
112
113   bridge = qa_config.get("bridge", None)
114   if bridge:
115     cmd.append("--bridge=%s" % bridge)
116     cmd.append("--master-netdev=%s" % bridge)
117
118   htype = qa_config.get("enabled-hypervisors", None)
119   if htype:
120     cmd.append("--enabled-hypervisors=%s" % htype)
121
122   cmd.append(qa_config.get("name"))
123   AssertCommand(cmd)
124
125   cmd = ["gnt-cluster", "modify"]
126
127   # hypervisor parameter modifications
128   hvp = qa_config.get("hypervisor-parameters", {})
129   for k, v in hvp.items():
130     cmd.extend(["-H", "%s:%s" % (k, v)])
131   # backend parameter modifications
132   bep = qa_config.get("backend-parameters", "")
133   if bep:
134     cmd.extend(["-B", bep])
135
136   if len(cmd) > 2:
137     AssertCommand(cmd)
138
139   # OS parameters
140   osp = qa_config.get("os-parameters", {})
141   for k, v in osp.items():
142     AssertCommand(["gnt-os", "modify", "-O", v, k])
143
144   # OS hypervisor parameters
145   os_hvp = qa_config.get("os-hvp", {})
146   for os_name in os_hvp:
147     for hv, hvp in os_hvp[os_name].items():
148       AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name])
149
150
151 def TestClusterRename():
152   """gnt-cluster rename"""
153   cmd = ["gnt-cluster", "rename", "-f"]
154
155   original_name = qa_config.get("name")
156   rename_target = qa_config.get("rename", None)
157   if rename_target is None:
158     print qa_utils.FormatError('"rename" entry is missing')
159     return
160
161   for data in [
162     cmd + [rename_target],
163     _CLUSTER_VERIFY,
164     cmd + [original_name],
165     _CLUSTER_VERIFY,
166     ]:
167     AssertCommand(data)
168
169
170 def TestClusterOob():
171   """out-of-band framework"""
172   oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
173
174   AssertCommand(_CLUSTER_VERIFY)
175   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
176                  "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
177                  utils.NewUUID()])
178
179   AssertCommand(_CLUSTER_VERIFY, fail=True)
180
181   AssertCommand(["touch", oob_path_exists])
182   AssertCommand(["chmod", "0400", oob_path_exists])
183   AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
184
185   try:
186     AssertCommand(["gnt-cluster", "modify", "--node-parameters",
187                    "oob_program=%s" % oob_path_exists])
188
189     AssertCommand(_CLUSTER_VERIFY, fail=True)
190
191     AssertCommand(["chmod", "0500", oob_path_exists])
192     AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
193
194     AssertCommand(_CLUSTER_VERIFY)
195   finally:
196     AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
197
198   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
199                  "oob_program="])
200
201
202 def TestClusterEpo():
203   """gnt-cluster epo"""
204   master = qa_config.GetMasterNode()
205
206   # Assert that OOB is unavailable for all nodes
207   result_output = GetCommandOutput(master["primary"],
208                                    "gnt-node list --verbose --no-headers -o"
209                                    " powered")
210   AssertEqual(compat.all(powered == "(unavail)"
211                          for powered in result_output.splitlines()), True)
212
213   # Conflicting
214   AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
215   # --all doesn't expect arguments
216   AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
217
218   # Unless --all is given master is not allowed to be in the list
219   AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
220
221   # This shouldn't fail
222   AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
223
224   # All instances should have been stopped now
225   result_output = GetCommandOutput(master["primary"],
226                                    "gnt-instance list --no-headers -o status")
227   AssertEqual(compat.all(status == "ADMIN_down"
228                          for status in result_output.splitlines()), True)
229
230   # Now start everything again
231   AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
232
233   # All instances should have been started now
234   result_output = GetCommandOutput(master["primary"],
235                                    "gnt-instance list --no-headers -o status")
236   AssertEqual(compat.all(status == "running"
237                          for status in result_output.splitlines()), True)
238
239
240 def TestClusterVerify():
241   """gnt-cluster verify"""
242   AssertCommand(_CLUSTER_VERIFY)
243   AssertCommand(["gnt-cluster", "verify-disks"])
244
245
246 def TestJobqueue():
247   """gnt-debug test-jobqueue"""
248   AssertCommand(["gnt-debug", "test-jobqueue"])
249
250
251 def TestDelay(node):
252   """gnt-debug delay"""
253   AssertCommand(["gnt-debug", "delay", "1"])
254   AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
255   AssertCommand(["gnt-debug", "delay", "--no-master",
256                  "-n", node["primary"], "1"])
257
258
259 def TestClusterReservedLvs():
260   """gnt-cluster reserved lvs"""
261   for fail, cmd in [
262     (False, _CLUSTER_VERIFY),
263     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
264     (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
265     (True, _CLUSTER_VERIFY),
266     (False, ["gnt-cluster", "modify", "--reserved-lvs",
267              "xenvg/qa-test,.*/other-test"]),
268     (False, _CLUSTER_VERIFY),
269     (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
270     (False, _CLUSTER_VERIFY),
271     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
272     (True, _CLUSTER_VERIFY),
273     (False, ["lvremove", "-f", "xenvg/qa-test"]),
274     (False, _CLUSTER_VERIFY),
275     ]:
276     AssertCommand(cmd, fail=fail)
277
278
279 def TestClusterModifyEmpty():
280   """gnt-cluster modify"""
281   AssertCommand(["gnt-cluster", "modify"], fail=True)
282
283
284 def TestClusterModifyDisk():
285   """gnt-cluster modify -D"""
286   for param in _FAIL_PARAMS:
287     AssertCommand(["gnt-cluster", "modify", "-D", param], fail=True)
288
289
290 def TestClusterModifyBe():
291   """gnt-cluster modify -B"""
292   for fail, cmd in [
293     # max/min mem
294     (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]),
295     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
296     (False, ["gnt-cluster", "modify", "-B", "minmem=256"]),
297     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
298     (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]),
299     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
300     (True, ["gnt-cluster", "modify", "-B", "minmem=a"]),
301     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
302     (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]),
303     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]),
304     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]),
305     # vcpus
306     (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
307     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
308     (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]),
309     (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
310     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
311     # auto_balance
312     (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
313     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
314     (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
315     (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
316     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
317     ]:
318     AssertCommand(cmd, fail=fail)
319
320   # redo the original-requested BE parameters, if any
321   bep = qa_config.get("backend-parameters", "")
322   if bep:
323     AssertCommand(["gnt-cluster", "modify", "-B", bep])
324
325
326 def TestClusterInfo():
327   """gnt-cluster info"""
328   AssertCommand(["gnt-cluster", "info"])
329
330
331 def TestClusterRedistConf():
332   """gnt-cluster redist-conf"""
333   AssertCommand(["gnt-cluster", "redist-conf"])
334
335
336 def TestClusterGetmaster():
337   """gnt-cluster getmaster"""
338   AssertCommand(["gnt-cluster", "getmaster"])
339
340
341 def TestClusterVersion():
342   """gnt-cluster version"""
343   AssertCommand(["gnt-cluster", "version"])
344
345
346 def TestClusterRenewCrypto():
347   """gnt-cluster renew-crypto"""
348   master = qa_config.GetMasterNode()
349
350   # Conflicting options
351   cmd = ["gnt-cluster", "renew-crypto", "--force",
352          "--new-cluster-certificate", "--new-confd-hmac-key"]
353   conflicting = [
354     ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
355     ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
356     ]
357   for i in conflicting:
358     AssertCommand(cmd + i, fail=True)
359
360   # Invalid RAPI certificate
361   cmd = ["gnt-cluster", "renew-crypto", "--force",
362          "--rapi-certificate=/dev/null"]
363   AssertCommand(cmd, fail=True)
364
365   rapi_cert_backup = qa_utils.BackupFile(master["primary"],
366                                          constants.RAPI_CERT_FILE)
367   try:
368     # Custom RAPI certificate
369     fh = tempfile.NamedTemporaryFile()
370
371     # Ensure certificate doesn't cause "gnt-cluster verify" to complain
372     validity = constants.SSL_CERT_EXPIRATION_WARN * 3
373
374     utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
375
376     tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
377     try:
378       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
379                      "--rapi-certificate=%s" % tmpcert])
380     finally:
381       AssertCommand(["rm", "-f", tmpcert])
382
383     # Custom cluster domain secret
384     cds_fh = tempfile.NamedTemporaryFile()
385     cds_fh.write(utils.GenerateSecret())
386     cds_fh.write("\n")
387     cds_fh.flush()
388
389     tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
390     try:
391       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
392                      "--cluster-domain-secret=%s" % tmpcds])
393     finally:
394       AssertCommand(["rm", "-f", tmpcds])
395
396     # Normal case
397     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
398                    "--new-cluster-certificate", "--new-confd-hmac-key",
399                    "--new-rapi-certificate", "--new-cluster-domain-secret"])
400
401     # Restore RAPI certificate
402     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
403                    "--rapi-certificate=%s" % rapi_cert_backup])
404   finally:
405     AssertCommand(["rm", "-f", rapi_cert_backup])
406
407
408 def TestClusterBurnin():
409   """Burnin"""
410   master = qa_config.GetMasterNode()
411
412   options = qa_config.get("options", {})
413   disk_template = options.get("burnin-disk-template", "drbd")
414   parallel = options.get("burnin-in-parallel", False)
415   check_inst = options.get("burnin-check-instances", False)
416   do_rename = options.get("burnin-rename", "")
417   do_reboot = options.get("burnin-reboot", True)
418   reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
419
420   # Get as many instances as we need
421   instances = []
422   try:
423     try:
424       num = qa_config.get("options", {}).get("burnin-instances", 1)
425       for _ in range(0, num):
426         instances.append(qa_config.AcquireInstance())
427     except qa_error.OutOfInstancesError:
428       print "Not enough instances, continuing anyway."
429
430     if len(instances) < 1:
431       raise qa_error.Error("Burnin needs at least one instance")
432
433     script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
434     try:
435       # Run burnin
436       cmd = [script,
437              "--os=%s" % qa_config.get("os"),
438              "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM),
439              "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM),
440              "--disk-size=%s" % ",".join(qa_config.get("disk")),
441              "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")),
442              "--disk-template=%s" % disk_template]
443       if parallel:
444         cmd.append("--parallel")
445         cmd.append("--early-release")
446       if check_inst:
447         cmd.append("--http-check")
448       if do_rename:
449         cmd.append("--rename=%s" % do_rename)
450       if not do_reboot:
451         cmd.append("--no-reboot")
452       else:
453         cmd.append("--reboot-types=%s" % ",".join(reboot_types))
454       cmd += [inst["name"] for inst in instances]
455       AssertCommand(cmd)
456     finally:
457       AssertCommand(["rm", "-f", script])
458
459   finally:
460     for inst in instances:
461       qa_config.ReleaseInstance(inst)
462
463
464 def TestClusterMasterFailover():
465   """gnt-cluster master-failover"""
466   master = qa_config.GetMasterNode()
467   failovermaster = qa_config.AcquireNode(exclude=master)
468
469   cmd = ["gnt-cluster", "master-failover"]
470   try:
471     AssertCommand(cmd, node=failovermaster)
472     # Back to original master node
473     AssertCommand(cmd, node=master)
474   finally:
475     qa_config.ReleaseNode(failovermaster)
476
477
478 def TestClusterMasterFailoverWithDrainedQueue():
479   """gnt-cluster master-failover with drained queue"""
480   drain_check = ["test", "-f", constants.JOB_QUEUE_DRAIN_FILE]
481
482   master = qa_config.GetMasterNode()
483   failovermaster = qa_config.AcquireNode(exclude=master)
484
485   # Ensure queue is not drained
486   for node in [master, failovermaster]:
487     AssertCommand(drain_check, node=node, fail=True)
488
489   # Drain queue on failover master
490   AssertCommand(["touch", constants.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
491
492   cmd = ["gnt-cluster", "master-failover"]
493   try:
494     AssertCommand(drain_check, node=failovermaster)
495     AssertCommand(cmd, node=failovermaster)
496     AssertCommand(drain_check, fail=True)
497     AssertCommand(drain_check, node=failovermaster, fail=True)
498
499     # Back to original master node
500     AssertCommand(cmd, node=master)
501   finally:
502     qa_config.ReleaseNode(failovermaster)
503
504   AssertCommand(drain_check, fail=True)
505   AssertCommand(drain_check, node=failovermaster, fail=True)
506
507
508 def TestClusterCopyfile():
509   """gnt-cluster copyfile"""
510   master = qa_config.GetMasterNode()
511
512   uniqueid = utils.NewUUID()
513
514   # Create temporary file
515   f = tempfile.NamedTemporaryFile()
516   f.write(uniqueid)
517   f.flush()
518   f.seek(0)
519
520   # Upload file to master node
521   testname = qa_utils.UploadFile(master["primary"], f.name)
522   try:
523     # Copy file to all nodes
524     AssertCommand(["gnt-cluster", "copyfile", testname])
525     _CheckFileOnAllNodes(testname, uniqueid)
526   finally:
527     _RemoveFileFromAllNodes(testname)
528
529
530 def TestClusterCommand():
531   """gnt-cluster command"""
532   uniqueid = utils.NewUUID()
533   rfile = "/tmp/gnt%s" % utils.NewUUID()
534   rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid])
535   cmd = utils.ShellQuoteArgs(["gnt-cluster", "command",
536                               "%s >%s" % (rcmd, rfile)])
537
538   try:
539     AssertCommand(cmd)
540     _CheckFileOnAllNodes(rfile, uniqueid)
541   finally:
542     _RemoveFileFromAllNodes(rfile)
543
544
545 def TestClusterDestroy():
546   """gnt-cluster destroy"""
547   AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
548
549
550 def TestClusterRepairDiskSizes():
551   """gnt-cluster repair-disk-sizes"""
552   AssertCommand(["gnt-cluster", "repair-disk-sizes"])