Fix qualified import of Data.Map in QC.hs
[ganeti-local] / qa / ganeti-qa.py
1 #!/usr/bin/python -u
2 #
3
4 # Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Script for doing QA on Ganeti.
23
24 """
25
26 # pylint: disable=C0103
27 # due to invalid name
28
29 import sys
30 import datetime
31 import optparse
32
33 import qa_cluster
34 import qa_config
35 import qa_daemon
36 import qa_env
37 import qa_error
38 import qa_group
39 import qa_instance
40 import qa_node
41 import qa_os
42 import qa_job
43 import qa_rapi
44 import qa_tags
45 import qa_utils
46
47 from ganeti import utils
48 from ganeti import rapi
49 from ganeti import constants
50
51 import ganeti.rapi.client # pylint: disable=W0611
52
53
54 def _FormatHeader(line, end=72):
55   """Fill a line up to the end column.
56
57   """
58   line = "---- " + line + " "
59   line += "-" * (end - len(line))
60   line = line.rstrip()
61   return line
62
63
64 def _DescriptionOf(fn):
65   """Computes the description of an item.
66
67   """
68   if fn.__doc__:
69     desc = fn.__doc__.splitlines()[0].strip()
70   else:
71     desc = "%r" % fn
72
73   return desc.rstrip(".")
74
75
76 def RunTest(fn, *args, **kwargs):
77   """Runs a test after printing a header.
78
79   """
80
81   tstart = datetime.datetime.now()
82
83   desc = _DescriptionOf(fn)
84
85   print
86   print _FormatHeader("%s start %s" % (tstart, desc))
87
88   try:
89     retval = fn(*args, **kwargs)
90     return retval
91   finally:
92     tstop = datetime.datetime.now()
93     tdelta = tstop - tstart
94     print _FormatHeader("%s time=%s %s" % (tstop, tdelta, desc))
95
96
97 def RunTestIf(testnames, fn, *args, **kwargs):
98   """Runs a test conditionally.
99
100   @param testnames: either a single test name in the configuration
101       file, or a list of testnames (which will be AND-ed together)
102
103   """
104   if qa_config.TestEnabled(testnames):
105     RunTest(fn, *args, **kwargs)
106   else:
107     tstart = datetime.datetime.now()
108     desc = _DescriptionOf(fn)
109     print _FormatHeader("%s skipping %s, test(s) %s disabled" %
110                         (tstart, desc, testnames))
111
112
113 def RunEnvTests():
114   """Run several environment tests.
115
116   """
117   RunTestIf("env", qa_env.TestSshConnection)
118   RunTestIf("env", qa_env.TestIcmpPing)
119   RunTestIf("env", qa_env.TestGanetiCommands)
120
121
122 def SetupCluster(rapi_user, rapi_secret):
123   """Initializes the cluster.
124
125   @param rapi_user: Login user for RAPI
126   @param rapi_secret: Login secret for RAPI
127
128   """
129   RunTestIf("create-cluster", qa_cluster.TestClusterInit,
130             rapi_user, rapi_secret)
131
132   # Test on empty cluster
133   RunTestIf("node-list", qa_node.TestNodeList)
134   RunTestIf("instance-list", qa_instance.TestInstanceList)
135   RunTestIf("job-list", qa_job.TestJobList)
136
137   RunTestIf("create-cluster", qa_node.TestNodeAddAll)
138   if not qa_config.TestEnabled("create-cluster"):
139     # consider the nodes are already there
140     qa_node.MarkNodeAddedAll()
141
142   RunTestIf("test-jobqueue", qa_cluster.TestJobqueue)
143
144   # enable the watcher (unconditionally)
145   RunTest(qa_daemon.TestResumeWatcher)
146
147   RunTestIf("node-list", qa_node.TestNodeList)
148
149   # Test listing fields
150   RunTestIf("node-list", qa_node.TestNodeListFields)
151   RunTestIf("instance-list", qa_instance.TestInstanceListFields)
152   RunTestIf("job-list", qa_job.TestJobListFields)
153   RunTestIf("instance-export", qa_instance.TestBackupListFields)
154
155   RunTestIf("node-info", qa_node.TestNodeInfo)
156
157
158 def RunClusterTests():
159   """Runs tests related to gnt-cluster.
160
161   """
162   for test, fn in [
163     ("create-cluster", qa_cluster.TestClusterInitDisk),
164     ("cluster-renew-crypto", qa_cluster.TestClusterRenewCrypto),
165     ("cluster-verify", qa_cluster.TestClusterVerify),
166     ("cluster-reserved-lvs", qa_cluster.TestClusterReservedLvs),
167     # TODO: add more cluster modify tests
168     ("cluster-modify", qa_cluster.TestClusterModifyEmpty),
169     ("cluster-modify", qa_cluster.TestClusterModifyBe),
170     ("cluster-modify", qa_cluster.TestClusterModifyDisk),
171     ("cluster-rename", qa_cluster.TestClusterRename),
172     ("cluster-info", qa_cluster.TestClusterVersion),
173     ("cluster-info", qa_cluster.TestClusterInfo),
174     ("cluster-info", qa_cluster.TestClusterGetmaster),
175     ("cluster-redist-conf", qa_cluster.TestClusterRedistConf),
176     ("cluster-copyfile", qa_cluster.TestClusterCopyfile),
177     ("cluster-command", qa_cluster.TestClusterCommand),
178     ("cluster-burnin", qa_cluster.TestClusterBurnin),
179     ("cluster-master-failover", qa_cluster.TestClusterMasterFailover),
180     ("cluster-master-failover",
181      qa_cluster.TestClusterMasterFailoverWithDrainedQueue),
182     ("cluster-oob", qa_cluster.TestClusterOob),
183     ("rapi", qa_rapi.TestVersion),
184     ("rapi", qa_rapi.TestEmptyCluster),
185     ("rapi", qa_rapi.TestRapiQuery),
186     ]:
187     RunTestIf(test, fn)
188
189
190 def RunRepairDiskSizes():
191   """Run the repair disk-sizes test.
192
193   """
194   RunTestIf("cluster-repair-disk-sizes", qa_cluster.TestClusterRepairDiskSizes)
195
196
197 def RunOsTests():
198   """Runs all tests related to gnt-os.
199
200   """
201   if qa_config.TestEnabled("rapi"):
202     rapi_getos = qa_rapi.GetOperatingSystems
203   else:
204     rapi_getos = None
205
206   for fn in [
207     qa_os.TestOsList,
208     qa_os.TestOsDiagnose,
209     ]:
210     RunTestIf("os", fn)
211
212   for fn in [
213     qa_os.TestOsValid,
214     qa_os.TestOsInvalid,
215     qa_os.TestOsPartiallyValid,
216     ]:
217     RunTestIf("os", fn, rapi_getos)
218
219   for fn in [
220     qa_os.TestOsModifyValid,
221     qa_os.TestOsModifyInvalid,
222     qa_os.TestOsStatesNonExisting,
223     ]:
224     RunTestIf("os", fn)
225
226
227 def RunCommonInstanceTests(instance):
228   """Runs a few tests that are common to all disk types.
229
230   """
231   RunTestIf("instance-shutdown", qa_instance.TestInstanceShutdown, instance)
232   RunTestIf(["instance-shutdown", "instance-console", "rapi"],
233             qa_rapi.TestRapiStoppedInstanceConsole, instance)
234   RunTestIf(["instance-shutdown", "instance-modify"],
235             qa_instance.TestInstanceStoppedModify, instance)
236   RunTestIf("instance-shutdown", qa_instance.TestInstanceStartup, instance)
237
238   # Test shutdown/start via RAPI
239   RunTestIf(["instance-shutdown", "rapi"],
240             qa_rapi.TestRapiInstanceShutdown, instance)
241   RunTestIf(["instance-shutdown", "rapi"],
242             qa_rapi.TestRapiInstanceStartup, instance)
243
244   RunTestIf("instance-list", qa_instance.TestInstanceList)
245
246   RunTestIf("instance-info", qa_instance.TestInstanceInfo, instance)
247
248   RunTestIf("instance-modify", qa_instance.TestInstanceModify, instance)
249   RunTestIf(["instance-modify", "rapi"],
250             qa_rapi.TestRapiInstanceModify, instance)
251
252   RunTestIf("instance-console", qa_instance.TestInstanceConsole, instance)
253   RunTestIf(["instance-console", "rapi"],
254             qa_rapi.TestRapiInstanceConsole, instance)
255
256   DOWN_TESTS = qa_config.Either([
257     "instance-reinstall",
258     "instance-rename",
259     "instance-grow-disk",
260     ])
261
262   # shutdown instance for any 'down' tests
263   RunTestIf(DOWN_TESTS, qa_instance.TestInstanceShutdown, instance)
264
265   # now run the 'down' state tests
266   RunTestIf("instance-reinstall", qa_instance.TestInstanceReinstall, instance)
267   RunTestIf(["instance-reinstall", "rapi"],
268             qa_rapi.TestRapiInstanceReinstall, instance)
269
270   if qa_config.TestEnabled("instance-rename"):
271     rename_source = instance["name"]
272     rename_target = qa_config.get("rename", None)
273     # perform instance rename to the same name
274     RunTest(qa_instance.TestInstanceRenameAndBack,
275             rename_source, rename_source)
276     RunTestIf("rapi", qa_rapi.TestRapiInstanceRenameAndBack,
277               rename_source, rename_source)
278     if rename_target is not None:
279       # perform instance rename to a different name, if we have one configured
280       RunTest(qa_instance.TestInstanceRenameAndBack,
281               rename_source, rename_target)
282       RunTestIf("rapi", qa_rapi.TestRapiInstanceRenameAndBack,
283                 rename_source, rename_target)
284
285   RunTestIf(["instance-grow-disk"], qa_instance.TestInstanceGrowDisk, instance)
286
287   # and now start the instance again
288   RunTestIf(DOWN_TESTS, qa_instance.TestInstanceStartup, instance)
289
290   RunTestIf("instance-reboot", qa_instance.TestInstanceReboot, instance)
291
292   RunTestIf("tags", qa_tags.TestInstanceTags, instance)
293
294   RunTestIf("cluster-verify", qa_cluster.TestClusterVerify)
295
296   RunTestIf("rapi", qa_rapi.TestInstance, instance)
297
298   # Lists instances, too
299   RunTestIf("node-list", qa_node.TestNodeList)
300
301   # Some jobs have been run, let's test listing them
302   RunTestIf("job-list", qa_job.TestJobList)
303
304
305 def RunCommonNodeTests():
306   """Run a few common node tests.
307
308   """
309   RunTestIf("node-volumes", qa_node.TestNodeVolumes)
310   RunTestIf("node-storage", qa_node.TestNodeStorage)
311   RunTestIf("node-oob", qa_node.TestOutOfBand)
312
313
314 def RunGroupListTests():
315   """Run tests for listing node groups.
316
317   """
318   RunTestIf("group-list", qa_group.TestGroupList)
319   RunTestIf("group-list", qa_group.TestGroupListFields)
320
321
322 def RunGroupRwTests():
323   """Run tests for adding/removing/renaming groups.
324
325   """
326   RunTestIf("group-rwops", qa_group.TestGroupAddRemoveRename)
327   RunTestIf("group-rwops", qa_group.TestGroupAddWithOptions)
328   RunTestIf("group-rwops", qa_group.TestGroupModify)
329   RunTestIf(["group-rwops", "rapi"], qa_rapi.TestRapiNodeGroups)
330   RunTestIf(["group-rwops", "tags"], qa_tags.TestGroupTags,
331             qa_group.GetDefaultGroup())
332
333
334 def RunExportImportTests(instance, pnode, snode):
335   """Tries to export and import the instance.
336
337   @param pnode: current primary node of the instance
338   @param snode: current secondary node of the instance, if any,
339       otherwise None
340
341   """
342   if qa_config.TestEnabled("instance-export"):
343     RunTest(qa_instance.TestInstanceExportNoTarget, instance)
344
345     expnode = qa_config.AcquireNode(exclude=pnode)
346     try:
347       name = RunTest(qa_instance.TestInstanceExport, instance, expnode)
348
349       RunTest(qa_instance.TestBackupList, expnode)
350
351       if qa_config.TestEnabled("instance-import"):
352         newinst = qa_config.AcquireInstance()
353         try:
354           RunTest(qa_instance.TestInstanceImport, newinst, pnode,
355                   expnode, name)
356           # Check if starting the instance works
357           RunTest(qa_instance.TestInstanceStartup, newinst)
358           RunTest(qa_instance.TestInstanceRemove, newinst)
359         finally:
360           qa_config.ReleaseInstance(newinst)
361     finally:
362       qa_config.ReleaseNode(expnode)
363
364   if qa_config.TestEnabled(["rapi", "inter-cluster-instance-move"]):
365     newinst = qa_config.AcquireInstance()
366     try:
367       if snode is None:
368         excl = [pnode]
369       else:
370         excl = [pnode, snode]
371       tnode = qa_config.AcquireNode(exclude=excl)
372       try:
373         RunTest(qa_rapi.TestInterClusterInstanceMove, instance, newinst,
374                 pnode, snode, tnode)
375       finally:
376         qa_config.ReleaseNode(tnode)
377     finally:
378       qa_config.ReleaseInstance(newinst)
379
380
381 def RunDaemonTests(instance):
382   """Test the ganeti-watcher script.
383
384   """
385   RunTest(qa_daemon.TestPauseWatcher)
386
387   RunTestIf("instance-automatic-restart",
388             qa_daemon.TestInstanceAutomaticRestart, instance)
389   RunTestIf("instance-consecutive-failures",
390             qa_daemon.TestInstanceConsecutiveFailures, instance)
391
392   RunTest(qa_daemon.TestResumeWatcher)
393
394
395 def RunSingleHomedHardwareFailureTests(instance, pnode):
396   """Test hardware failure recovery for single-homed instances.
397
398   """
399   if qa_config.TestEnabled("instance-recreate-disks"):
400     othernode = qa_config.AcquireNode(exclude=[pnode])
401     try:
402       RunTest(qa_instance.TestRecreateDisks,
403               instance, pnode, None, [othernode])
404     finally:
405       qa_config.ReleaseNode(othernode)
406
407
408 def RunHardwareFailureTests(instance, pnode, snode):
409   """Test cluster internal hardware failure recovery.
410
411   """
412   RunTestIf("instance-failover", qa_instance.TestInstanceFailover, instance)
413   RunTestIf(["instance-failover", "rapi"],
414             qa_rapi.TestRapiInstanceFailover, instance)
415
416   RunTestIf("instance-migrate", qa_instance.TestInstanceMigrate, instance)
417   RunTestIf(["instance-migrate", "rapi"],
418             qa_rapi.TestRapiInstanceMigrate, instance)
419
420   if qa_config.TestEnabled("instance-replace-disks"):
421     othernode = qa_config.AcquireNode(exclude=[pnode, snode])
422     try:
423       RunTestIf("rapi", qa_rapi.TestRapiInstanceReplaceDisks, instance)
424       RunTest(qa_instance.TestReplaceDisks,
425               instance, pnode, snode, othernode)
426     finally:
427       qa_config.ReleaseNode(othernode)
428
429   if qa_config.TestEnabled("instance-recreate-disks"):
430     othernode1 = qa_config.AcquireNode(exclude=[pnode, snode])
431     try:
432       othernode2 = qa_config.AcquireNode(exclude=[pnode, snode, othernode1])
433     except qa_error.OutOfNodesError:
434       # Let's reuse one of the nodes if the cluster is not big enough
435       othernode2 = pnode
436     try:
437       RunTest(qa_instance.TestRecreateDisks,
438               instance, pnode, snode, [othernode1, othernode2])
439     finally:
440       qa_config.ReleaseNode(othernode1)
441       if othernode2 != pnode:
442         qa_config.ReleaseNode(othernode2)
443
444   RunTestIf("node-evacuate", qa_node.TestNodeEvacuate, pnode, snode)
445
446   RunTestIf("node-failover", qa_node.TestNodeFailover, pnode, snode)
447
448   RunTestIf("instance-disk-failure", qa_instance.TestInstanceMasterDiskFailure,
449             instance, pnode, snode)
450   RunTestIf("instance-disk-failure",
451             qa_instance.TestInstanceSecondaryDiskFailure, instance,
452             pnode, snode)
453
454
455 def RunQa():
456   """Main QA body.
457
458   """
459   rapi_user = "ganeti-qa"
460   rapi_secret = utils.GenerateSecret()
461
462   RunEnvTests()
463   SetupCluster(rapi_user, rapi_secret)
464
465   # Load RAPI certificate
466   qa_rapi.Setup(rapi_user, rapi_secret)
467
468   RunClusterTests()
469   RunOsTests()
470
471   RunTestIf("tags", qa_tags.TestClusterTags)
472
473   RunCommonNodeTests()
474   RunGroupListTests()
475   RunGroupRwTests()
476
477   pnode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
478   try:
479     RunTestIf("node-readd", qa_node.TestNodeReadd, pnode)
480     RunTestIf("node-modify", qa_node.TestNodeModify, pnode)
481     RunTestIf("delay", qa_cluster.TestDelay, pnode)
482   finally:
483     qa_config.ReleaseNode(pnode)
484
485   pnode = qa_config.AcquireNode()
486   try:
487     RunTestIf("tags", qa_tags.TestNodeTags, pnode)
488
489     if qa_rapi.Enabled():
490       RunTest(qa_rapi.TestNode, pnode)
491
492       if qa_config.TestEnabled("instance-add-plain-disk"):
493         for use_client in [True, False]:
494           rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode,
495                                   use_client)
496           if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
497             RunCommonInstanceTests(rapi_instance)
498           RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
499           del rapi_instance
500
501     if qa_config.TestEnabled("instance-add-plain-disk"):
502       instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, pnode)
503       RunCommonInstanceTests(instance)
504       RunGroupListTests()
505       RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
506       RunExportImportTests(instance, pnode, None)
507       RunDaemonTests(instance)
508       RunRepairDiskSizes()
509       RunSingleHomedHardwareFailureTests(instance, pnode)
510       RunTest(qa_instance.TestInstanceRemove, instance)
511       del instance
512
513     multinode_tests = [
514       ("instance-add-drbd-disk",
515        qa_instance.TestInstanceAddWithDrbdDisk),
516     ]
517
518     for name, func in multinode_tests:
519       if qa_config.TestEnabled(name):
520         snode = qa_config.AcquireNode(exclude=pnode)
521         try:
522           instance = RunTest(func, pnode, snode)
523           RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, pnode)
524           RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, snode)
525           RunCommonInstanceTests(instance)
526           RunGroupListTests()
527           RunTest(qa_group.TestAssignNodesIncludingSplit,
528                   constants.INITIAL_NODE_GROUP_NAME,
529                   pnode["primary"], snode["primary"])
530           if qa_config.TestEnabled("instance-convert-disk"):
531             RunTest(qa_instance.TestInstanceShutdown, instance)
532             RunTest(qa_instance.TestInstanceConvertDisk, instance, snode)
533             RunTest(qa_instance.TestInstanceStartup, instance)
534           RunExportImportTests(instance, pnode, snode)
535           RunHardwareFailureTests(instance, pnode, snode)
536           RunRepairDiskSizes()
537           RunTest(qa_instance.TestInstanceRemove, instance)
538           del instance
539         finally:
540           qa_config.ReleaseNode(snode)
541
542     # Test removing instance with offline drbd secondary
543     if qa_config.TestEnabled("instance-remove-drbd-offline"):
544       snode = qa_config.AcquireNode(exclude=pnode)
545       instance = \
546         qa_instance.TestInstanceAddWithDrbdDisk(pnode, snode)
547       try:
548         qa_node.MakeNodeOffline(snode, "yes")
549         RunTest(qa_instance.TestInstanceRemove, instance)
550       finally:
551         qa_node.MakeNodeOffline(snode, "no")
552         qa_config.ReleaseNode(snode)
553
554     if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]):
555       for shutdown in [False, True]:
556         instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, pnode)
557         expnode = qa_config.AcquireNode(exclude=pnode)
558         try:
559           if shutdown:
560             # Stop instance before exporting and removing it
561             RunTest(qa_instance.TestInstanceShutdown, instance)
562           RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode)
563           RunTest(qa_instance.TestBackupList, expnode)
564         finally:
565           qa_config.ReleaseNode(expnode)
566         del expnode
567         del instance
568
569   finally:
570     qa_config.ReleaseNode(pnode)
571
572   RunTestIf("create-cluster", qa_node.TestNodeRemoveAll)
573
574   RunTestIf("cluster-destroy", qa_cluster.TestClusterDestroy)
575
576
577 @rapi.client.UsesRapiClient
578 def main():
579   """Main program.
580
581   """
582   parser = optparse.OptionParser(usage="%prog [options] <config-file>")
583   parser.add_option("--yes-do-it", dest="yes_do_it",
584                     action="store_true",
585                     help="Really execute the tests")
586   (qa_config.options, args) = parser.parse_args()
587
588   if len(args) == 1:
589     (config_file, ) = args
590   else:
591     parser.error("Wrong number of arguments.")
592
593   if not qa_config.options.yes_do_it:
594     print ("Executing this script irreversibly destroys any Ganeti\n"
595            "configuration on all nodes involved. If you really want\n"
596            "to start testing, supply the --yes-do-it option.")
597     sys.exit(1)
598
599   qa_config.Load(config_file)
600
601   primary = qa_config.GetMasterNode()["primary"]
602   qa_utils.StartMultiplexer(primary)
603   print ("SSH command for primary node: %s" %
604          utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, "")))
605   print ("SSH command for other nodes: %s" %
606          utils.ShellQuoteArgs(qa_utils.GetSSHCommand("NODE", "")))
607   try:
608     RunQa()
609   finally:
610     qa_utils.CloseMultiplexers()
611
612 if __name__ == "__main__":
613   main()