QA: init cluster changes ispecs
[ganeti-local] / qa / qa_cluster.py
1 #
2 #
3
4 # Copyright (C) 2007, 2010, 2011 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Cluster related QA tests.
23
24 """
25
26 import tempfile
27 import os.path
28
29 from ganeti import constants
30 from ganeti import compat
31 from ganeti import utils
32
33 import qa_config
34 import qa_utils
35 import qa_error
36
37 from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
38
39
40 #: cluster verify command
41 _CLUSTER_VERIFY = ["gnt-cluster", "verify"]
42
43
44 def _RemoveFileFromAllNodes(filename):
45   """Removes a file from all nodes.
46
47   """
48   for node in qa_config.get("nodes"):
49     AssertCommand(["rm", "-f", filename], node=node)
50
51
52 def _CheckFileOnAllNodes(filename, content):
53   """Verifies the content of the given file on all nodes.
54
55   """
56   cmd = utils.ShellQuoteArgs(["cat", filename])
57   for node in qa_config.get("nodes"):
58     AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
59
60
61 def TestClusterInit(rapi_user, rapi_secret):
62   """gnt-cluster init"""
63   # data for testing failures due to bad keys/values for disk parameters
64   fail_params = ("-D nonexistent:resync-rate=1",
65                  "-D drbd:nonexistent=1",
66                  "-D drbd:resync-rate=invalid")
67
68   master = qa_config.GetMasterNode()
69
70   rapi_dir = os.path.dirname(constants.RAPI_USERS_FILE)
71
72   # First create the RAPI credentials
73   fh = tempfile.NamedTemporaryFile()
74   try:
75     fh.write("%s %s write\n" % (rapi_user, rapi_secret))
76     fh.flush()
77
78     tmpru = qa_utils.UploadFile(master["primary"], fh.name)
79     try:
80       AssertCommand(["mkdir", "-p", rapi_dir])
81       AssertCommand(["mv", tmpru, constants.RAPI_USERS_FILE])
82     finally:
83       AssertCommand(["rm", "-f", tmpru])
84   finally:
85     fh.close()
86
87   # Initialize cluster
88   cmd = ["gnt-cluster", "init"]
89
90   cmd.append("--primary-ip-version=%d" %
91              qa_config.get("primary_ip_version", 4))
92   cmd.append("--specs-mem-size=max=1024")
93   cmd.append("--specs-disk-size=min=512")
94
95   if master.get("secondary", None):
96     cmd.append("--secondary-ip=%s" % master["secondary"])
97
98   bridge = qa_config.get("bridge", None)
99   if bridge:
100     cmd.append("--bridge=%s" % bridge)
101     cmd.append("--master-netdev=%s" % bridge)
102
103   htype = qa_config.get("enabled-hypervisors", None)
104   if htype:
105     cmd.append("--enabled-hypervisors=%s" % htype)
106
107   # test gnt-cluster init failures due to bad keys/values in disk parameters
108   for param in fail_params:
109     cmd.extend([param, qa_config.get("name")])
110     AssertCommand(cmd, fail=True)
111     cmd.pop()
112     cmd.pop()
113
114   cmd.append(qa_config.get("name"))
115   AssertCommand(cmd)
116
117   cmd = ["gnt-cluster", "modify"]
118   # test gnt-cluster modify failures due to bad keys/values in disk parameters
119   for param in fail_params:
120     cmd.append(param)
121     AssertCommand(cmd, fail=True)
122     cmd.pop()
123
124   # hypervisor parameter modifications
125   hvp = qa_config.get("hypervisor-parameters", {})
126   for k, v in hvp.items():
127     cmd.extend(["-H", "%s:%s" % (k, v)])
128   # backend parameter modifications
129   bep = qa_config.get("backend-parameters", "")
130   if bep:
131     cmd.extend(["-B", bep])
132
133   if len(cmd) > 2:
134     AssertCommand(cmd)
135
136   # OS parameters
137   osp = qa_config.get("os-parameters", {})
138   for k, v in osp.items():
139     AssertCommand(["gnt-os", "modify", "-O", v, k])
140
141   # OS hypervisor parameters
142   os_hvp = qa_config.get("os-hvp", {})
143   for os_name in os_hvp:
144     for hv, hvp in os_hvp[os_name].items():
145       AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name])
146
147
148 def TestClusterRename():
149   """gnt-cluster rename"""
150   cmd = ["gnt-cluster", "rename", "-f"]
151
152   original_name = qa_config.get("name")
153   rename_target = qa_config.get("rename", None)
154   if rename_target is None:
155     print qa_utils.FormatError('"rename" entry is missing')
156     return
157
158   for data in [
159     cmd + [rename_target],
160     _CLUSTER_VERIFY,
161     cmd + [original_name],
162     _CLUSTER_VERIFY,
163     ]:
164     AssertCommand(data)
165
166
167 def TestClusterOob():
168   """out-of-band framework"""
169   oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
170
171   AssertCommand(_CLUSTER_VERIFY)
172   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
173                  "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
174                  utils.NewUUID()])
175
176   AssertCommand(_CLUSTER_VERIFY, fail=True)
177
178   AssertCommand(["touch", oob_path_exists])
179   AssertCommand(["chmod", "0400", oob_path_exists])
180   AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
181
182   try:
183     AssertCommand(["gnt-cluster", "modify", "--node-parameters",
184                    "oob_program=%s" % oob_path_exists])
185
186     AssertCommand(_CLUSTER_VERIFY, fail=True)
187
188     AssertCommand(["chmod", "0500", oob_path_exists])
189     AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
190
191     AssertCommand(_CLUSTER_VERIFY)
192   finally:
193     AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
194
195   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
196                  "oob_program="])
197
198
199 def TestClusterEpo():
200   """gnt-cluster epo"""
201   master = qa_config.GetMasterNode()
202
203   # Assert that OOB is unavailable for all nodes
204   result_output = GetCommandOutput(master["primary"],
205                                    "gnt-node list --verbose --no-header -o"
206                                    " powered")
207   AssertEqual(compat.all(powered == "(unavail)"
208                          for powered in result_output.splitlines()), True)
209
210   # Conflicting
211   AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
212   # --all doesn't expect arguments
213   AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
214
215   # Unless --all is given master is not allowed to be in the list
216   AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
217
218   # This shouldn't fail
219   AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
220
221   # All instances should have been stopped now
222   result_output = GetCommandOutput(master["primary"],
223                                    "gnt-instance list --no-header -o status")
224   AssertEqual(compat.all(status == "ADMIN_down"
225                          for status in result_output.splitlines()), True)
226
227   # Now start everything again
228   AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
229
230   # All instances should have been started now
231   result_output = GetCommandOutput(master["primary"],
232                                    "gnt-instance list --no-header -o status")
233   AssertEqual(compat.all(status == "running"
234                          for status in result_output.splitlines()), True)
235
236
237 def TestClusterVerify():
238   """gnt-cluster verify"""
239   AssertCommand(_CLUSTER_VERIFY)
240   AssertCommand(["gnt-cluster", "verify-disks"])
241
242
243 def TestJobqueue():
244   """gnt-debug test-jobqueue"""
245   AssertCommand(["gnt-debug", "test-jobqueue"])
246
247
248 def TestDelay(node):
249   """gnt-debug delay"""
250   AssertCommand(["gnt-debug", "delay", "1"])
251   AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
252   AssertCommand(["gnt-debug", "delay", "--no-master",
253                  "-n", node["primary"], "1"])
254
255
256 def TestClusterReservedLvs():
257   """gnt-cluster reserved lvs"""
258   for fail, cmd in [
259     (False, _CLUSTER_VERIFY),
260     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
261     (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
262     (True, _CLUSTER_VERIFY),
263     (False, ["gnt-cluster", "modify", "--reserved-lvs",
264              "xenvg/qa-test,.*/other-test"]),
265     (False, _CLUSTER_VERIFY),
266     (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
267     (False, _CLUSTER_VERIFY),
268     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
269     (True, _CLUSTER_VERIFY),
270     (False, ["lvremove", "-f", "xenvg/qa-test"]),
271     (False, _CLUSTER_VERIFY),
272     ]:
273     AssertCommand(cmd, fail=fail)
274
275
276 def TestClusterModifyBe():
277   """gnt-cluster modify -B"""
278   for fail, cmd in [
279     # max/min mem
280     (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]),
281     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
282     (False, ["gnt-cluster", "modify", "-B", "minmem=256"]),
283     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
284     (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]),
285     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
286     (True, ["gnt-cluster", "modify", "-B", "minmem=a"]),
287     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
288     (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]),
289     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]),
290     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]),
291     # vcpus
292     (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
293     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
294     (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]),
295     (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
296     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
297     # auto_balance
298     (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
299     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
300     (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
301     (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
302     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
303     ]:
304     AssertCommand(cmd, fail=fail)
305
306   # redo the original-requested BE parameters, if any
307   bep = qa_config.get("backend-parameters", "")
308   if bep:
309     AssertCommand(["gnt-cluster", "modify", "-B", bep])
310
311
312 def TestClusterInfo():
313   """gnt-cluster info"""
314   AssertCommand(["gnt-cluster", "info"])
315
316
317 def TestClusterRedistConf():
318   """gnt-cluster redist-conf"""
319   AssertCommand(["gnt-cluster", "redist-conf"])
320
321
322 def TestClusterGetmaster():
323   """gnt-cluster getmaster"""
324   AssertCommand(["gnt-cluster", "getmaster"])
325
326
327 def TestClusterVersion():
328   """gnt-cluster version"""
329   AssertCommand(["gnt-cluster", "version"])
330
331
332 def TestClusterRenewCrypto():
333   """gnt-cluster renew-crypto"""
334   master = qa_config.GetMasterNode()
335
336   # Conflicting options
337   cmd = ["gnt-cluster", "renew-crypto", "--force",
338          "--new-cluster-certificate", "--new-confd-hmac-key"]
339   conflicting = [
340     ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
341     ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
342     ]
343   for i in conflicting:
344     AssertCommand(cmd + i, fail=True)
345
346   # Invalid RAPI certificate
347   cmd = ["gnt-cluster", "renew-crypto", "--force",
348          "--rapi-certificate=/dev/null"]
349   AssertCommand(cmd, fail=True)
350
351   rapi_cert_backup = qa_utils.BackupFile(master["primary"],
352                                          constants.RAPI_CERT_FILE)
353   try:
354     # Custom RAPI certificate
355     fh = tempfile.NamedTemporaryFile()
356
357     # Ensure certificate doesn't cause "gnt-cluster verify" to complain
358     validity = constants.SSL_CERT_EXPIRATION_WARN * 3
359
360     utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
361
362     tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
363     try:
364       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
365                      "--rapi-certificate=%s" % tmpcert])
366     finally:
367       AssertCommand(["rm", "-f", tmpcert])
368
369     # Custom cluster domain secret
370     cds_fh = tempfile.NamedTemporaryFile()
371     cds_fh.write(utils.GenerateSecret())
372     cds_fh.write("\n")
373     cds_fh.flush()
374
375     tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
376     try:
377       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
378                      "--cluster-domain-secret=%s" % tmpcds])
379     finally:
380       AssertCommand(["rm", "-f", tmpcds])
381
382     # Normal case
383     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
384                    "--new-cluster-certificate", "--new-confd-hmac-key",
385                    "--new-rapi-certificate", "--new-cluster-domain-secret"])
386
387     # Restore RAPI certificate
388     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
389                    "--rapi-certificate=%s" % rapi_cert_backup])
390   finally:
391     AssertCommand(["rm", "-f", rapi_cert_backup])
392
393
394 def TestClusterBurnin():
395   """Burnin"""
396   master = qa_config.GetMasterNode()
397
398   options = qa_config.get("options", {})
399   disk_template = options.get("burnin-disk-template", "drbd")
400   parallel = options.get("burnin-in-parallel", False)
401   check_inst = options.get("burnin-check-instances", False)
402   do_rename = options.get("burnin-rename", "")
403   do_reboot = options.get("burnin-reboot", True)
404   reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
405
406   # Get as many instances as we need
407   instances = []
408   try:
409     try:
410       num = qa_config.get("options", {}).get("burnin-instances", 1)
411       for _ in range(0, num):
412         instances.append(qa_config.AcquireInstance())
413     except qa_error.OutOfInstancesError:
414       print "Not enough instances, continuing anyway."
415
416     if len(instances) < 1:
417       raise qa_error.Error("Burnin needs at least one instance")
418
419     script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
420     try:
421       # Run burnin
422       cmd = [script,
423              "--os=%s" % qa_config.get("os"),
424              "--disk-size=%s" % ",".join(qa_config.get("disk")),
425              "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")),
426              "--disk-template=%s" % disk_template]
427       if parallel:
428         cmd.append("--parallel")
429         cmd.append("--early-release")
430       if check_inst:
431         cmd.append("--http-check")
432       if do_rename:
433         cmd.append("--rename=%s" % do_rename)
434       if not do_reboot:
435         cmd.append("--no-reboot")
436       else:
437         cmd.append("--reboot-types=%s" % ",".join(reboot_types))
438       cmd += [inst["name"] for inst in instances]
439       AssertCommand(cmd)
440     finally:
441       AssertCommand(["rm", "-f", script])
442
443   finally:
444     for inst in instances:
445       qa_config.ReleaseInstance(inst)
446
447
448 def TestClusterMasterFailover():
449   """gnt-cluster master-failover"""
450   master = qa_config.GetMasterNode()
451   failovermaster = qa_config.AcquireNode(exclude=master)
452
453   cmd = ["gnt-cluster", "master-failover"]
454   try:
455     AssertCommand(cmd, node=failovermaster)
456     # Back to original master node
457     AssertCommand(cmd, node=master)
458   finally:
459     qa_config.ReleaseNode(failovermaster)
460
461
462 def TestClusterMasterFailoverWithDrainedQueue():
463   """gnt-cluster master-failover with drained queue"""
464   drain_check = ["test", "-f", constants.JOB_QUEUE_DRAIN_FILE]
465
466   master = qa_config.GetMasterNode()
467   failovermaster = qa_config.AcquireNode(exclude=master)
468
469   # Ensure queue is not drained
470   for node in [master, failovermaster]:
471     AssertCommand(drain_check, node=node, fail=True)
472
473   # Drain queue on failover master
474   AssertCommand(["touch", constants.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
475
476   cmd = ["gnt-cluster", "master-failover"]
477   try:
478     AssertCommand(drain_check, node=failovermaster)
479     AssertCommand(cmd, node=failovermaster)
480     AssertCommand(drain_check, fail=True)
481     AssertCommand(drain_check, node=failovermaster, fail=True)
482
483     # Back to original master node
484     AssertCommand(cmd, node=master)
485   finally:
486     qa_config.ReleaseNode(failovermaster)
487
488   AssertCommand(drain_check, fail=True)
489   AssertCommand(drain_check, node=failovermaster, fail=True)
490
491
492 def TestClusterCopyfile():
493   """gnt-cluster copyfile"""
494   master = qa_config.GetMasterNode()
495
496   uniqueid = utils.NewUUID()
497
498   # Create temporary file
499   f = tempfile.NamedTemporaryFile()
500   f.write(uniqueid)
501   f.flush()
502   f.seek(0)
503
504   # Upload file to master node
505   testname = qa_utils.UploadFile(master["primary"], f.name)
506   try:
507     # Copy file to all nodes
508     AssertCommand(["gnt-cluster", "copyfile", testname])
509     _CheckFileOnAllNodes(testname, uniqueid)
510   finally:
511     _RemoveFileFromAllNodes(testname)
512
513
514 def TestClusterCommand():
515   """gnt-cluster command"""
516   uniqueid = utils.NewUUID()
517   rfile = "/tmp/gnt%s" % utils.NewUUID()
518   rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid])
519   cmd = utils.ShellQuoteArgs(["gnt-cluster", "command",
520                               "%s >%s" % (rcmd, rfile)])
521
522   try:
523     AssertCommand(cmd)
524     _CheckFileOnAllNodes(rfile, uniqueid)
525   finally:
526     _RemoveFileFromAllNodes(rfile)
527
528
529 def TestClusterDestroy():
530   """gnt-cluster destroy"""
531   AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
532
533
534 def TestClusterRepairDiskSizes():
535   """gnt-cluster repair-disk-sizes"""
536   AssertCommand(["gnt-cluster", "repair-disk-sizes"])