configure: Add option to enable remote commands
[ganeti-local] / qa / qa_cluster.py
1 #
2 #
3
4 # Copyright (C) 2007, 2010, 2011, 2012 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Cluster related QA tests.
23
24 """
25
26 import tempfile
27 import os.path
28
29 from ganeti import constants
30 from ganeti import compat
31 from ganeti import utils
32 from ganeti import pathutils
33
34 import qa_config
35 import qa_utils
36 import qa_error
37
38 from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
39
40
41 #: cluster verify command
42 _CLUSTER_VERIFY = ["gnt-cluster", "verify"]
43
44
45 def _RemoveFileFromAllNodes(filename):
46   """Removes a file from all nodes.
47
48   """
49   for node in qa_config.get("nodes"):
50     AssertCommand(["rm", "-f", filename], node=node)
51
52
53 def _CheckFileOnAllNodes(filename, content):
54   """Verifies the content of the given file on all nodes.
55
56   """
57   cmd = utils.ShellQuoteArgs(["cat", filename])
58   for node in qa_config.get("nodes"):
59     AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
60
61
62 # data for testing failures due to bad keys/values for disk parameters
63 _FAIL_PARAMS = ["nonexistent:resync-rate=1",
64                 "drbd:nonexistent=1",
65                 "drbd:resync-rate=invalid",
66                 ]
67
68
69 def TestClusterInitDisk():
70   """gnt-cluster init -D"""
71   name = qa_config.get("name")
72   for param in _FAIL_PARAMS:
73     AssertCommand(["gnt-cluster", "init", "-D", param, name], fail=True)
74
75
76 def TestClusterInit(rapi_user, rapi_secret):
77   """gnt-cluster init"""
78   master = qa_config.GetMasterNode()
79
80   rapi_dir = os.path.dirname(pathutils.RAPI_USERS_FILE)
81
82   # First create the RAPI credentials
83   fh = tempfile.NamedTemporaryFile()
84   try:
85     fh.write("%s %s write\n" % (rapi_user, rapi_secret))
86     fh.flush()
87
88     tmpru = qa_utils.UploadFile(master["primary"], fh.name)
89     try:
90       AssertCommand(["mkdir", "-p", rapi_dir])
91       AssertCommand(["mv", tmpru, pathutils.RAPI_USERS_FILE])
92     finally:
93       AssertCommand(["rm", "-f", tmpru])
94   finally:
95     fh.close()
96
97   # Initialize cluster
98   cmd = [
99     "gnt-cluster", "init",
100     "--primary-ip-version=%d" % qa_config.get("primary_ip_version", 4),
101     "--enabled-hypervisors=%s" % ",".join(qa_config.GetEnabledHypervisors()),
102     ]
103
104   for spec_type in ("mem-size", "disk-size", "disk-count", "cpu-count",
105                     "nic-count"):
106     for spec_val in ("min", "max", "std"):
107       spec = qa_config.get("ispec_%s_%s" %
108                            (spec_type.replace('-', '_'), spec_val), None)
109       if spec:
110         cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec))
111
112   if master.get("secondary", None):
113     cmd.append("--secondary-ip=%s" % master["secondary"])
114
115   bridge = qa_config.get("bridge", None)
116   if bridge:
117     cmd.append("--bridge=%s" % bridge)
118     cmd.append("--master-netdev=%s" % bridge)
119
120   cmd.append(qa_config.get("name"))
121   AssertCommand(cmd)
122
123   cmd = ["gnt-cluster", "modify"]
124
125   # hypervisor parameter modifications
126   hvp = qa_config.get("hypervisor-parameters", {})
127   for k, v in hvp.items():
128     cmd.extend(["-H", "%s:%s" % (k, v)])
129   # backend parameter modifications
130   bep = qa_config.get("backend-parameters", "")
131   if bep:
132     cmd.extend(["-B", bep])
133
134   if len(cmd) > 2:
135     AssertCommand(cmd)
136
137   # OS parameters
138   osp = qa_config.get("os-parameters", {})
139   for k, v in osp.items():
140     AssertCommand(["gnt-os", "modify", "-O", v, k])
141
142   # OS hypervisor parameters
143   os_hvp = qa_config.get("os-hvp", {})
144   for os_name in os_hvp:
145     for hv, hvp in os_hvp[os_name].items():
146       AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name])
147
148
149 def TestClusterRename():
150   """gnt-cluster rename"""
151   cmd = ["gnt-cluster", "rename", "-f"]
152
153   original_name = qa_config.get("name")
154   rename_target = qa_config.get("rename", None)
155   if rename_target is None:
156     print qa_utils.FormatError('"rename" entry is missing')
157     return
158
159   for data in [
160     cmd + [rename_target],
161     _CLUSTER_VERIFY,
162     cmd + [original_name],
163     _CLUSTER_VERIFY,
164     ]:
165     AssertCommand(data)
166
167
168 def TestClusterOob():
169   """out-of-band framework"""
170   oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
171
172   AssertCommand(_CLUSTER_VERIFY)
173   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
174                  "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
175                  utils.NewUUID()])
176
177   AssertCommand(_CLUSTER_VERIFY, fail=True)
178
179   AssertCommand(["touch", oob_path_exists])
180   AssertCommand(["chmod", "0400", oob_path_exists])
181   AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
182
183   try:
184     AssertCommand(["gnt-cluster", "modify", "--node-parameters",
185                    "oob_program=%s" % oob_path_exists])
186
187     AssertCommand(_CLUSTER_VERIFY, fail=True)
188
189     AssertCommand(["chmod", "0500", oob_path_exists])
190     AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
191
192     AssertCommand(_CLUSTER_VERIFY)
193   finally:
194     AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
195
196   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
197                  "oob_program="])
198
199
200 def TestClusterEpo():
201   """gnt-cluster epo"""
202   master = qa_config.GetMasterNode()
203
204   # Assert that OOB is unavailable for all nodes
205   result_output = GetCommandOutput(master["primary"],
206                                    "gnt-node list --verbose --no-headers -o"
207                                    " powered")
208   AssertEqual(compat.all(powered == "(unavail)"
209                          for powered in result_output.splitlines()), True)
210
211   # Conflicting
212   AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
213   # --all doesn't expect arguments
214   AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
215
216   # Unless --all is given master is not allowed to be in the list
217   AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
218
219   # This shouldn't fail
220   AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
221
222   # All instances should have been stopped now
223   result_output = GetCommandOutput(master["primary"],
224                                    "gnt-instance list --no-headers -o status")
225   # ERROR_down because the instance is stopped but not recorded as such
226   AssertEqual(compat.all(status == "ERROR_down"
227                          for status in result_output.splitlines()), True)
228
229   # Now start everything again
230   AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
231
232   # All instances should have been started now
233   result_output = GetCommandOutput(master["primary"],
234                                    "gnt-instance list --no-headers -o status")
235   AssertEqual(compat.all(status == "running"
236                          for status in result_output.splitlines()), True)
237
238
239 def TestClusterVerify():
240   """gnt-cluster verify"""
241   AssertCommand(_CLUSTER_VERIFY)
242   AssertCommand(["gnt-cluster", "verify-disks"])
243
244
245 def TestJobqueue():
246   """gnt-debug test-jobqueue"""
247   AssertCommand(["gnt-debug", "test-jobqueue"])
248
249
250 def TestDelay(node):
251   """gnt-debug delay"""
252   AssertCommand(["gnt-debug", "delay", "1"])
253   AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
254   AssertCommand(["gnt-debug", "delay", "--no-master",
255                  "-n", node["primary"], "1"])
256
257
258 def TestClusterReservedLvs():
259   """gnt-cluster reserved lvs"""
260   for fail, cmd in [
261     (False, _CLUSTER_VERIFY),
262     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
263     (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
264     (True, _CLUSTER_VERIFY),
265     (False, ["gnt-cluster", "modify", "--reserved-lvs",
266              "xenvg/qa-test,.*/other-test"]),
267     (False, _CLUSTER_VERIFY),
268     (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
269     (False, _CLUSTER_VERIFY),
270     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
271     (True, _CLUSTER_VERIFY),
272     (False, ["lvremove", "-f", "xenvg/qa-test"]),
273     (False, _CLUSTER_VERIFY),
274     ]:
275     AssertCommand(cmd, fail=fail)
276
277
278 def TestClusterModifyEmpty():
279   """gnt-cluster modify"""
280   AssertCommand(["gnt-cluster", "modify"], fail=True)
281
282
283 def TestClusterModifyDisk():
284   """gnt-cluster modify -D"""
285   for param in _FAIL_PARAMS:
286     AssertCommand(["gnt-cluster", "modify", "-D", param], fail=True)
287
288
289 def TestClusterModifyBe():
290   """gnt-cluster modify -B"""
291   for fail, cmd in [
292     # max/min mem
293     (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]),
294     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
295     (False, ["gnt-cluster", "modify", "-B", "minmem=256"]),
296     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
297     (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]),
298     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
299     (True, ["gnt-cluster", "modify", "-B", "minmem=a"]),
300     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
301     (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]),
302     (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]),
303     (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]),
304     # vcpus
305     (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
306     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
307     (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]),
308     (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
309     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
310     # auto_balance
311     (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
312     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
313     (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
314     (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
315     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
316     ]:
317     AssertCommand(cmd, fail=fail)
318
319   # redo the original-requested BE parameters, if any
320   bep = qa_config.get("backend-parameters", "")
321   if bep:
322     AssertCommand(["gnt-cluster", "modify", "-B", bep])
323
324
325 def TestClusterInfo():
326   """gnt-cluster info"""
327   AssertCommand(["gnt-cluster", "info"])
328
329
330 def TestClusterRedistConf():
331   """gnt-cluster redist-conf"""
332   AssertCommand(["gnt-cluster", "redist-conf"])
333
334
335 def TestClusterGetmaster():
336   """gnt-cluster getmaster"""
337   AssertCommand(["gnt-cluster", "getmaster"])
338
339
340 def TestClusterVersion():
341   """gnt-cluster version"""
342   AssertCommand(["gnt-cluster", "version"])
343
344
345 def TestClusterRenewCrypto():
346   """gnt-cluster renew-crypto"""
347   master = qa_config.GetMasterNode()
348
349   # Conflicting options
350   cmd = ["gnt-cluster", "renew-crypto", "--force",
351          "--new-cluster-certificate", "--new-confd-hmac-key"]
352   conflicting = [
353     ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
354     ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
355     ]
356   for i in conflicting:
357     AssertCommand(cmd + i, fail=True)
358
359   # Invalid RAPI certificate
360   cmd = ["gnt-cluster", "renew-crypto", "--force",
361          "--rapi-certificate=/dev/null"]
362   AssertCommand(cmd, fail=True)
363
364   rapi_cert_backup = qa_utils.BackupFile(master["primary"],
365                                          pathutils.RAPI_CERT_FILE)
366   try:
367     # Custom RAPI certificate
368     fh = tempfile.NamedTemporaryFile()
369
370     # Ensure certificate doesn't cause "gnt-cluster verify" to complain
371     validity = constants.SSL_CERT_EXPIRATION_WARN * 3
372
373     utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
374
375     tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
376     try:
377       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
378                      "--rapi-certificate=%s" % tmpcert])
379     finally:
380       AssertCommand(["rm", "-f", tmpcert])
381
382     # Custom cluster domain secret
383     cds_fh = tempfile.NamedTemporaryFile()
384     cds_fh.write(utils.GenerateSecret())
385     cds_fh.write("\n")
386     cds_fh.flush()
387
388     tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
389     try:
390       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
391                      "--cluster-domain-secret=%s" % tmpcds])
392     finally:
393       AssertCommand(["rm", "-f", tmpcds])
394
395     # Normal case
396     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
397                    "--new-cluster-certificate", "--new-confd-hmac-key",
398                    "--new-rapi-certificate", "--new-cluster-domain-secret"])
399
400     # Restore RAPI certificate
401     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
402                    "--rapi-certificate=%s" % rapi_cert_backup])
403   finally:
404     AssertCommand(["rm", "-f", rapi_cert_backup])
405
406
407 def TestClusterBurnin():
408   """Burnin"""
409   master = qa_config.GetMasterNode()
410
411   options = qa_config.get("options", {})
412   disk_template = options.get("burnin-disk-template", "drbd")
413   parallel = options.get("burnin-in-parallel", False)
414   check_inst = options.get("burnin-check-instances", False)
415   do_rename = options.get("burnin-rename", "")
416   do_reboot = options.get("burnin-reboot", True)
417   reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
418
419   # Get as many instances as we need
420   instances = []
421   try:
422     try:
423       num = qa_config.get("options", {}).get("burnin-instances", 1)
424       for _ in range(0, num):
425         instances.append(qa_config.AcquireInstance())
426     except qa_error.OutOfInstancesError:
427       print "Not enough instances, continuing anyway."
428
429     if len(instances) < 1:
430       raise qa_error.Error("Burnin needs at least one instance")
431
432     script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
433     try:
434       # Run burnin
435       cmd = [script,
436              "--os=%s" % qa_config.get("os"),
437              "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM),
438              "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM),
439              "--disk-size=%s" % ",".join(qa_config.get("disk")),
440              "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")),
441              "--disk-template=%s" % disk_template]
442       if parallel:
443         cmd.append("--parallel")
444         cmd.append("--early-release")
445       if check_inst:
446         cmd.append("--http-check")
447       if do_rename:
448         cmd.append("--rename=%s" % do_rename)
449       if not do_reboot:
450         cmd.append("--no-reboot")
451       else:
452         cmd.append("--reboot-types=%s" % ",".join(reboot_types))
453       cmd += [inst["name"] for inst in instances]
454       AssertCommand(cmd)
455     finally:
456       AssertCommand(["rm", "-f", script])
457
458   finally:
459     for inst in instances:
460       qa_config.ReleaseInstance(inst)
461
462
463 def TestClusterMasterFailover():
464   """gnt-cluster master-failover"""
465   master = qa_config.GetMasterNode()
466   failovermaster = qa_config.AcquireNode(exclude=master)
467
468   cmd = ["gnt-cluster", "master-failover"]
469   try:
470     AssertCommand(cmd, node=failovermaster)
471     # Back to original master node
472     AssertCommand(cmd, node=master)
473   finally:
474     qa_config.ReleaseNode(failovermaster)
475
476
477 def TestClusterMasterFailoverWithDrainedQueue():
478   """gnt-cluster master-failover with drained queue"""
479   drain_check = ["test", "-f", pathutils.JOB_QUEUE_DRAIN_FILE]
480
481   master = qa_config.GetMasterNode()
482   failovermaster = qa_config.AcquireNode(exclude=master)
483
484   # Ensure queue is not drained
485   for node in [master, failovermaster]:
486     AssertCommand(drain_check, node=node, fail=True)
487
488   # Drain queue on failover master
489   AssertCommand(["touch", pathutils.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
490
491   cmd = ["gnt-cluster", "master-failover"]
492   try:
493     AssertCommand(drain_check, node=failovermaster)
494     AssertCommand(cmd, node=failovermaster)
495     AssertCommand(drain_check, fail=True)
496     AssertCommand(drain_check, node=failovermaster, fail=True)
497
498     # Back to original master node
499     AssertCommand(cmd, node=master)
500   finally:
501     qa_config.ReleaseNode(failovermaster)
502
503   AssertCommand(drain_check, fail=True)
504   AssertCommand(drain_check, node=failovermaster, fail=True)
505
506
507 def TestClusterCopyfile():
508   """gnt-cluster copyfile"""
509   master = qa_config.GetMasterNode()
510
511   uniqueid = utils.NewUUID()
512
513   # Create temporary file
514   f = tempfile.NamedTemporaryFile()
515   f.write(uniqueid)
516   f.flush()
517   f.seek(0)
518
519   # Upload file to master node
520   testname = qa_utils.UploadFile(master["primary"], f.name)
521   try:
522     # Copy file to all nodes
523     AssertCommand(["gnt-cluster", "copyfile", testname])
524     _CheckFileOnAllNodes(testname, uniqueid)
525   finally:
526     _RemoveFileFromAllNodes(testname)
527
528
529 def TestClusterCommand():
530   """gnt-cluster command"""
531   uniqueid = utils.NewUUID()
532   rfile = "/tmp/gnt%s" % utils.NewUUID()
533   rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid])
534   cmd = utils.ShellQuoteArgs(["gnt-cluster", "command",
535                               "%s >%s" % (rcmd, rfile)])
536
537   try:
538     AssertCommand(cmd)
539     _CheckFileOnAllNodes(rfile, uniqueid)
540   finally:
541     _RemoveFileFromAllNodes(rfile)
542
543
544 def TestClusterDestroy():
545   """gnt-cluster destroy"""
546   AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
547
548
549 def TestClusterRepairDiskSizes():
550   """gnt-cluster repair-disk-sizes"""
551   AssertCommand(["gnt-cluster", "repair-disk-sizes"])