gnt-instance: Adding instance policy to failover
[ganeti-local] / qa / ganeti-qa.py
1 #!/usr/bin/python -u
2 #
3
4 # Copyright (C) 2007, 2008, 2009, 2010, 2011 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Script for doing QA on Ganeti.
23
24 """
25
26 # pylint: disable=C0103
27 # due to invalid name
28
29 import sys
30 import datetime
31 import optparse
32
33 import qa_cluster
34 import qa_config
35 import qa_daemon
36 import qa_env
37 import qa_group
38 import qa_instance
39 import qa_node
40 import qa_os
41 import qa_rapi
42 import qa_tags
43 import qa_utils
44
45 from ganeti import utils
46 from ganeti import rapi
47 from ganeti import constants
48
49 import ganeti.rapi.client # pylint: disable=W0611
50
51
52 def _FormatHeader(line, end=72):
53   """Fill a line up to the end column.
54
55   """
56   line = "---- " + line + " "
57   line += "-" * (end - len(line))
58   line = line.rstrip()
59   return line
60
61
62 def _DescriptionOf(fn):
63   """Computes the description of an item.
64
65   """
66   if fn.__doc__:
67     desc = fn.__doc__.splitlines()[0].strip()
68   else:
69     desc = "%r" % fn
70
71   return desc.rstrip(".")
72
73
74 def RunTest(fn, *args):
75   """Runs a test after printing a header.
76
77   """
78
79   tstart = datetime.datetime.now()
80
81   desc = _DescriptionOf(fn)
82
83   print
84   print _FormatHeader("%s start %s" % (tstart, desc))
85
86   try:
87     retval = fn(*args)
88     return retval
89   finally:
90     tstop = datetime.datetime.now()
91     tdelta = tstop - tstart
92     print _FormatHeader("%s time=%s %s" % (tstop, tdelta, desc))
93
94
95 def RunTestIf(testnames, fn, *args):
96   """Runs a test conditionally.
97
98   @param testnames: either a single test name in the configuration
99       file, or a list of testnames (which will be AND-ed together)
100
101   """
102   if qa_config.TestEnabled(testnames):
103     RunTest(fn, *args)
104   else:
105     tstart = datetime.datetime.now()
106     desc = _DescriptionOf(fn)
107     print _FormatHeader("%s skipping %s, test(s) %s disabled" %
108                         (tstart, desc, testnames))
109
110
111 def RunEnvTests():
112   """Run several environment tests.
113
114   """
115   RunTestIf("env", qa_env.TestSshConnection)
116   RunTestIf("env", qa_env.TestIcmpPing)
117   RunTestIf("env", qa_env.TestGanetiCommands)
118
119
120 def SetupCluster(rapi_user, rapi_secret):
121   """Initializes the cluster.
122
123   @param rapi_user: Login user for RAPI
124   @param rapi_secret: Login secret for RAPI
125
126   """
127   RunTestIf("create-cluster", qa_cluster.TestClusterInit,
128             rapi_user, rapi_secret)
129
130   # Test on empty cluster
131   RunTestIf("node-list", qa_node.TestNodeList)
132   RunTestIf("instance-list", qa_instance.TestInstanceList)
133
134   RunTestIf("create-cluster", qa_node.TestNodeAddAll)
135   if not qa_config.TestEnabled("create-cluster"):
136     # consider the nodes are already there
137     qa_node.MarkNodeAddedAll()
138
139   RunTestIf("test-jobqueue", qa_cluster.TestJobqueue)
140
141   # enable the watcher (unconditionally)
142   RunTest(qa_daemon.TestResumeWatcher)
143
144   RunTestIf("node-list", qa_node.TestNodeList)
145
146   # Test listing fields
147   RunTestIf("node-list", qa_node.TestNodeListFields)
148   RunTestIf("instance-list", qa_instance.TestInstanceListFields)
149
150   RunTestIf("node-info", qa_node.TestNodeInfo)
151
152
153 def RunClusterTests():
154   """Runs tests related to gnt-cluster.
155
156   """
157   for test, fn in [
158     ("create-cluster", qa_cluster.TestClusterInitDisk),
159     ("cluster-renew-crypto", qa_cluster.TestClusterRenewCrypto),
160     ("cluster-verify", qa_cluster.TestClusterVerify),
161     ("cluster-reserved-lvs", qa_cluster.TestClusterReservedLvs),
162     # TODO: add more cluster modify tests
163     ("cluster-modify", qa_cluster.TestClusterModifyBe),
164     ("cluster-modify", qa_cluster.TestClusterModifyDisk),
165     ("cluster-rename", qa_cluster.TestClusterRename),
166     ("cluster-info", qa_cluster.TestClusterVersion),
167     ("cluster-info", qa_cluster.TestClusterInfo),
168     ("cluster-info", qa_cluster.TestClusterGetmaster),
169     ("cluster-redist-conf", qa_cluster.TestClusterRedistConf),
170     ("cluster-copyfile", qa_cluster.TestClusterCopyfile),
171     ("cluster-command", qa_cluster.TestClusterCommand),
172     ("cluster-burnin", qa_cluster.TestClusterBurnin),
173     ("cluster-master-failover", qa_cluster.TestClusterMasterFailover),
174     ("cluster-master-failover",
175      qa_cluster.TestClusterMasterFailoverWithDrainedQueue),
176     ("cluster-oob", qa_cluster.TestClusterOob),
177     ("rapi", qa_rapi.TestVersion),
178     ("rapi", qa_rapi.TestEmptyCluster),
179     ("rapi", qa_rapi.TestRapiQuery),
180     ]:
181     RunTestIf(test, fn)
182
183
184 def RunRepairDiskSizes():
185   """Run the repair disk-sizes test.
186
187   """
188   RunTestIf("cluster-repair-disk-sizes", qa_cluster.TestClusterRepairDiskSizes)
189
190
191 def RunOsTests():
192   """Runs all tests related to gnt-os.
193
194   """
195   if qa_config.TestEnabled("rapi"):
196     rapi_getos = qa_rapi.GetOperatingSystems
197   else:
198     rapi_getos = None
199
200   for fn in [
201     qa_os.TestOsList,
202     qa_os.TestOsDiagnose,
203     ]:
204     RunTestIf("os", fn)
205
206   for fn in [
207     qa_os.TestOsValid,
208     qa_os.TestOsInvalid,
209     qa_os.TestOsPartiallyValid,
210     ]:
211     RunTestIf("os", fn, rapi_getos)
212
213   for fn in [
214     qa_os.TestOsModifyValid,
215     qa_os.TestOsModifyInvalid,
216     qa_os.TestOsStatesNonExisting,
217     ]:
218     RunTestIf("os", fn)
219
220
221 def RunCommonInstanceTests(instance):
222   """Runs a few tests that are common to all disk types.
223
224   """
225   RunTestIf("instance-shutdown", qa_instance.TestInstanceShutdown, instance)
226   RunTestIf(["instance-shutdown", "instance-console", "rapi"],
227             qa_rapi.TestRapiStoppedInstanceConsole, instance)
228   RunTestIf("instance-shutdown", qa_instance.TestInstanceStartup, instance)
229
230   # Test shutdown/start via RAPI
231   RunTestIf(["instance-shutdown", "rapi"],
232             qa_rapi.TestRapiInstanceShutdown, instance)
233   RunTestIf(["instance-shutdown", "rapi"],
234             qa_rapi.TestRapiInstanceStartup, instance)
235
236   RunTestIf("instance-list", qa_instance.TestInstanceList)
237
238   RunTestIf("instance-info", qa_instance.TestInstanceInfo, instance)
239
240   RunTestIf("instance-modify", qa_instance.TestInstanceModify, instance)
241   RunTestIf(["instance-modify", "rapi"],
242             qa_rapi.TestRapiInstanceModify, instance)
243
244   RunTestIf("instance-console", qa_instance.TestInstanceConsole, instance)
245   RunTestIf(["instance-console", "rapi"],
246             qa_rapi.TestRapiInstanceConsole, instance)
247
248   RunTestIf("instance-reinstall", qa_instance.TestInstanceShutdown, instance)
249   RunTestIf("instance-reinstall", qa_instance.TestInstanceReinstall, instance)
250   RunTestIf(["instance-reinstall", "rapi"],
251             qa_rapi.TestRapiInstanceReinstall, instance)
252   RunTestIf("instance-reinstall", qa_instance.TestInstanceStartup, instance)
253
254   RunTestIf("instance-reboot", qa_instance.TestInstanceReboot, instance)
255
256   if qa_config.TestEnabled("instance-rename"):
257     rename_source = instance["name"]
258     rename_target = qa_config.get("rename", None)
259     RunTest(qa_instance.TestInstanceShutdown, instance)
260     # perform instance rename to the same name
261     RunTest(qa_instance.TestInstanceRename, rename_source, rename_source)
262     RunTestIf("rapi", qa_rapi.TestRapiInstanceRename,
263               rename_source, rename_source)
264     if rename_target is not None:
265       # perform instance rename to a different name, if we have one configured
266       RunTest(qa_instance.TestInstanceRename, rename_source, rename_target)
267       RunTest(qa_instance.TestInstanceRename, rename_target, rename_source)
268       RunTestIf("rapi", qa_rapi.TestRapiInstanceRename,
269                 rename_source, rename_target)
270       RunTestIf("rapi", qa_rapi.TestRapiInstanceRename,
271                 rename_target, rename_source)
272     RunTest(qa_instance.TestInstanceStartup, instance)
273
274   RunTestIf("tags", qa_tags.TestInstanceTags, instance)
275
276   RunTestIf("cluster-verify", qa_cluster.TestClusterVerify)
277
278   RunTestIf("rapi", qa_rapi.TestInstance, instance)
279
280   # Lists instances, too
281   RunTestIf("node-list", qa_node.TestNodeList)
282
283
284 def RunCommonNodeTests():
285   """Run a few common node tests.
286
287   """
288   RunTestIf("node-volumes", qa_node.TestNodeVolumes)
289   RunTestIf("node-storage", qa_node.TestNodeStorage)
290   RunTestIf("node-oob", qa_node.TestOutOfBand)
291
292
293 def RunGroupListTests():
294   """Run tests for listing node groups.
295
296   """
297   RunTestIf("group-list", qa_group.TestGroupList)
298   RunTestIf("group-list", qa_group.TestGroupListFields)
299
300
301 def RunGroupRwTests():
302   """Run tests for adding/removing/renaming groups.
303
304   """
305   RunTestIf("group-rwops", qa_group.TestGroupAddRemoveRename)
306   RunTestIf("group-rwops", qa_group.TestGroupAddWithOptions)
307   RunTestIf("group-rwops", qa_group.TestGroupModify)
308   RunTestIf(["group-rwops", "rapi"], qa_rapi.TestRapiNodeGroups)
309   RunTestIf(["group-rwops", "tags"], qa_tags.TestGroupTags,
310             qa_group.GetDefaultGroup())
311
312
313 def RunExportImportTests(instance, pnode, snode):
314   """Tries to export and import the instance.
315
316   @param pnode: current primary node of the instance
317   @param snode: current secondary node of the instance, if any,
318       otherwise None
319
320   """
321   if qa_config.TestEnabled("instance-export"):
322     RunTest(qa_instance.TestInstanceExportNoTarget, instance)
323
324     expnode = qa_config.AcquireNode(exclude=pnode)
325     try:
326       name = RunTest(qa_instance.TestInstanceExport, instance, expnode)
327
328       RunTest(qa_instance.TestBackupList, expnode)
329
330       if qa_config.TestEnabled("instance-import"):
331         newinst = qa_config.AcquireInstance()
332         try:
333           RunTest(qa_instance.TestInstanceImport, pnode, newinst,
334                   expnode, name)
335           RunTest(qa_instance.TestInstanceRemove, newinst)
336         finally:
337           qa_config.ReleaseInstance(newinst)
338     finally:
339       qa_config.ReleaseNode(expnode)
340
341   if qa_config.TestEnabled(["rapi", "inter-cluster-instance-move"]):
342     newinst = qa_config.AcquireInstance()
343     try:
344       if snode is None:
345         excl = [pnode]
346       else:
347         excl = [pnode, snode]
348       tnode = qa_config.AcquireNode(exclude=excl)
349       try:
350         RunTest(qa_rapi.TestInterClusterInstanceMove, instance, newinst,
351                 pnode, snode, tnode)
352       finally:
353         qa_config.ReleaseNode(tnode)
354     finally:
355       qa_config.ReleaseInstance(newinst)
356
357
358 def RunDaemonTests(instance):
359   """Test the ganeti-watcher script.
360
361   """
362   RunTest(qa_daemon.TestPauseWatcher)
363
364   RunTestIf("instance-automatic-restart",
365             qa_daemon.TestInstanceAutomaticRestart, instance)
366   RunTestIf("instance-consecutive-failures",
367             qa_daemon.TestInstanceConsecutiveFailures, instance)
368
369   RunTest(qa_daemon.TestResumeWatcher)
370
371
372 def RunHardwareFailureTests(instance, pnode, snode):
373   """Test cluster internal hardware failure recovery.
374
375   """
376   RunTestIf("instance-failover", qa_instance.TestInstanceFailover, instance)
377   RunTestIf(["instance-failover", "rapi"],
378             qa_rapi.TestRapiInstanceFailover, instance)
379
380   RunTestIf("instance-migrate", qa_instance.TestInstanceMigrate, instance)
381   RunTestIf(["instance-migrate", "rapi"],
382             qa_rapi.TestRapiInstanceMigrate, instance)
383
384   if qa_config.TestEnabled("instance-replace-disks"):
385     othernode = qa_config.AcquireNode(exclude=[pnode, snode])
386     try:
387       RunTestIf("rapi", qa_rapi.TestRapiInstanceReplaceDisks, instance)
388       RunTest(qa_instance.TestReplaceDisks,
389               instance, pnode, snode, othernode)
390     finally:
391       qa_config.ReleaseNode(othernode)
392
393   RunTestIf("node-evacuate", qa_node.TestNodeEvacuate, pnode, snode)
394
395   RunTestIf("node-failover", qa_node.TestNodeFailover, pnode, snode)
396
397   RunTestIf("instance-disk-failure", qa_instance.TestInstanceMasterDiskFailure,
398             instance, pnode, snode)
399   RunTestIf("instance-disk-failure",
400             qa_instance.TestInstanceSecondaryDiskFailure, instance,
401             pnode, snode)
402
403
404 def RunQa():
405   """Main QA body.
406
407   """
408   rapi_user = "ganeti-qa"
409   rapi_secret = utils.GenerateSecret()
410
411   RunEnvTests()
412   SetupCluster(rapi_user, rapi_secret)
413
414   # Load RAPI certificate
415   qa_rapi.Setup(rapi_user, rapi_secret)
416
417   RunClusterTests()
418   RunOsTests()
419
420   RunTestIf("tags", qa_tags.TestClusterTags)
421
422   RunCommonNodeTests()
423   RunGroupListTests()
424   RunGroupRwTests()
425
426   pnode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
427   try:
428     RunTestIf("node-readd", qa_node.TestNodeReadd, pnode)
429     RunTestIf("node-modify", qa_node.TestNodeModify, pnode)
430     RunTestIf("delay", qa_cluster.TestDelay, pnode)
431   finally:
432     qa_config.ReleaseNode(pnode)
433
434   pnode = qa_config.AcquireNode()
435   try:
436     RunTestIf("tags", qa_tags.TestNodeTags, pnode)
437
438     if qa_rapi.Enabled():
439       RunTest(qa_rapi.TestNode, pnode)
440
441       if qa_config.TestEnabled("instance-add-plain-disk"):
442         for use_client in [True, False]:
443           rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode,
444                                   use_client)
445           RunCommonInstanceTests(rapi_instance)
446           RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
447           del rapi_instance
448
449     if qa_config.TestEnabled("instance-add-plain-disk"):
450       instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, pnode)
451       RunCommonInstanceTests(instance)
452       RunGroupListTests()
453       RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
454       RunExportImportTests(instance, pnode, None)
455       RunDaemonTests(instance)
456       RunRepairDiskSizes()
457       RunTest(qa_instance.TestInstanceRemove, instance)
458       del instance
459
460     multinode_tests = [
461       ("instance-add-drbd-disk",
462        qa_instance.TestInstanceAddWithDrbdDisk),
463     ]
464
465     for name, func in multinode_tests:
466       if qa_config.TestEnabled(name):
467         snode = qa_config.AcquireNode(exclude=pnode)
468         try:
469           instance = RunTest(func, pnode, snode)
470           RunCommonInstanceTests(instance)
471           RunGroupListTests()
472           RunTest(qa_group.TestAssignNodesIncludingSplit,
473                   constants.INITIAL_NODE_GROUP_NAME,
474                   pnode["primary"], snode["primary"])
475           if qa_config.TestEnabled("instance-convert-disk"):
476             RunTest(qa_instance.TestInstanceShutdown, instance)
477             RunTest(qa_instance.TestInstanceConvertDisk, instance, snode)
478             RunTest(qa_instance.TestInstanceStartup, instance)
479           RunExportImportTests(instance, pnode, snode)
480           RunHardwareFailureTests(instance, pnode, snode)
481           RunRepairDiskSizes()
482           RunTest(qa_instance.TestInstanceRemove, instance)
483           del instance
484         finally:
485           qa_config.ReleaseNode(snode)
486
487     if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]):
488       for shutdown in [False, True]:
489         instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, pnode)
490         expnode = qa_config.AcquireNode(exclude=pnode)
491         try:
492           if shutdown:
493             # Stop instance before exporting and removing it
494             RunTest(qa_instance.TestInstanceShutdown, instance)
495           RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode)
496           RunTest(qa_instance.TestBackupList, expnode)
497         finally:
498           qa_config.ReleaseNode(expnode)
499         del expnode
500         del instance
501
502   finally:
503     qa_config.ReleaseNode(pnode)
504
505   RunTestIf("create-cluster", qa_node.TestNodeRemoveAll)
506
507   RunTestIf("cluster-destroy", qa_cluster.TestClusterDestroy)
508
509
510 @rapi.client.UsesRapiClient
511 def main():
512   """Main program.
513
514   """
515   parser = optparse.OptionParser(usage="%prog [options] <config-file>")
516   parser.add_option("--yes-do-it", dest="yes_do_it",
517       action="store_true",
518       help="Really execute the tests")
519   (qa_config.options, args) = parser.parse_args()
520
521   if len(args) == 1:
522     (config_file, ) = args
523   else:
524     parser.error("Wrong number of arguments.")
525
526   if not qa_config.options.yes_do_it:
527     print ("Executing this script irreversibly destroys any Ganeti\n"
528            "configuration on all nodes involved. If you really want\n"
529            "to start testing, supply the --yes-do-it option.")
530     sys.exit(1)
531
532   qa_config.Load(config_file)
533
534   qa_utils.StartMultiplexer(qa_config.GetMasterNode()["primary"])
535   try:
536     RunQa()
537   finally:
538     qa_utils.CloseMultiplexers()
539
540 if __name__ == "__main__":
541   main()