gnt-cluster verify: consider shared file storage
[ganeti-local] / qa / ganeti-qa.py
1 #!/usr/bin/python -u
2 #
3
4 # Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Script for doing QA on Ganeti.
23
24 """
25
26 # pylint: disable=C0103
27 # due to invalid name
28
29 import copy
30 import datetime
31 import optparse
32 import sys
33
34 import qa_cluster
35 import qa_config
36 import qa_daemon
37 import qa_env
38 import qa_error
39 import qa_group
40 import qa_instance
41 import qa_monitoring
42 import qa_network
43 import qa_node
44 import qa_os
45 import qa_job
46 import qa_rapi
47 import qa_tags
48 import qa_utils
49
50 from ganeti import utils
51 from ganeti import rapi # pylint: disable=W0611
52 from ganeti import constants
53 from ganeti import pathutils
54
55 from ganeti.http.auth import ParsePasswordFile
56 import ganeti.rapi.client # pylint: disable=W0611
57 from ganeti.rapi.client import UsesRapiClient
58
59
60 def _FormatHeader(line, end=72):
61   """Fill a line up to the end column.
62
63   """
64   line = "---- " + line + " "
65   line += "-" * (end - len(line))
66   line = line.rstrip()
67   return line
68
69
70 def _DescriptionOf(fn):
71   """Computes the description of an item.
72
73   """
74   if fn.__doc__:
75     desc = fn.__doc__.splitlines()[0].strip()
76   else:
77     desc = "%r" % fn
78
79   return desc.rstrip(".")
80
81
82 def RunTest(fn, *args, **kwargs):
83   """Runs a test after printing a header.
84
85   """
86
87   tstart = datetime.datetime.now()
88
89   desc = _DescriptionOf(fn)
90
91   print
92   print _FormatHeader("%s start %s" % (tstart, desc))
93
94   try:
95     retval = fn(*args, **kwargs)
96     return retval
97   finally:
98     tstop = datetime.datetime.now()
99     tdelta = tstop - tstart
100     print _FormatHeader("%s time=%s %s" % (tstop, tdelta, desc))
101
102
103 def RunTestIf(testnames, fn, *args, **kwargs):
104   """Runs a test conditionally.
105
106   @param testnames: either a single test name in the configuration
107       file, or a list of testnames (which will be AND-ed together)
108
109   """
110   if qa_config.TestEnabled(testnames):
111     RunTest(fn, *args, **kwargs)
112   else:
113     tstart = datetime.datetime.now()
114     desc = _DescriptionOf(fn)
115     # TODO: Formatting test names when non-string names are involved
116     print _FormatHeader("%s skipping %s, test(s) %s disabled" %
117                         (tstart, desc, testnames))
118
119
120 def RunEnvTests():
121   """Run several environment tests.
122
123   """
124   RunTestIf("env", qa_env.TestSshConnection)
125   RunTestIf("env", qa_env.TestIcmpPing)
126   RunTestIf("env", qa_env.TestGanetiCommands)
127
128
129 def _LookupRapiSecret(rapi_user):
130   """Find the RAPI secret for the given user.
131
132   @param rapi_user: Login user
133   @return: Login secret for the user
134
135   """
136   CTEXT = "{CLEARTEXT}"
137   master = qa_config.GetMasterNode()
138   cmd = ["cat", qa_utils.MakeNodePath(master, pathutils.RAPI_USERS_FILE)]
139   file_content = qa_utils.GetCommandOutput(master.primary,
140                                            utils.ShellQuoteArgs(cmd))
141   users = ParsePasswordFile(file_content)
142   entry = users.get(rapi_user)
143   if not entry:
144     raise qa_error.Error("User %s not found in RAPI users file" % rapi_user)
145   secret = entry.password
146   if secret.upper().startswith(CTEXT):
147     secret = secret[len(CTEXT):]
148   elif secret.startswith("{"):
149     raise qa_error.Error("Unsupported password schema for RAPI user %s:"
150                          " not a clear text password" % rapi_user)
151   return secret
152
153
154 def SetupCluster(rapi_user):
155   """Initializes the cluster.
156
157   @param rapi_user: Login user for RAPI
158   @return: Login secret for RAPI
159
160   """
161   rapi_secret = utils.GenerateSecret()
162   RunTestIf("create-cluster", qa_cluster.TestClusterInit,
163             rapi_user, rapi_secret)
164   if not qa_config.TestEnabled("create-cluster"):
165     # If the cluster is already in place, we assume that exclusive-storage is
166     # already set according to the configuration
167     qa_config.SetExclusiveStorage(qa_config.get("exclusive-storage", False))
168     if qa_rapi.Enabled():
169       # To support RAPI on an existing cluster we have to find out the secret
170       rapi_secret = _LookupRapiSecret(rapi_user)
171
172   # Test on empty cluster
173   RunTestIf("node-list", qa_node.TestNodeList)
174   RunTestIf("instance-list", qa_instance.TestInstanceList)
175   RunTestIf("job-list", qa_job.TestJobList)
176
177   RunTestIf("create-cluster", qa_node.TestNodeAddAll)
178   if not qa_config.TestEnabled("create-cluster"):
179     # consider the nodes are already there
180     qa_node.MarkNodeAddedAll()
181
182   RunTestIf("test-jobqueue", qa_cluster.TestJobqueue)
183
184   # enable the watcher (unconditionally)
185   RunTest(qa_daemon.TestResumeWatcher)
186
187   RunTestIf("node-list", qa_node.TestNodeList)
188
189   # Test listing fields
190   RunTestIf("node-list", qa_node.TestNodeListFields)
191   RunTestIf("instance-list", qa_instance.TestInstanceListFields)
192   RunTestIf("job-list", qa_job.TestJobListFields)
193   RunTestIf("instance-export", qa_instance.TestBackupListFields)
194
195   RunTestIf("node-info", qa_node.TestNodeInfo)
196
197   return rapi_secret
198
199
200 def RunClusterTests():
201   """Runs tests related to gnt-cluster.
202
203   """
204   for test, fn in [
205     ("create-cluster", qa_cluster.TestClusterInitDisk),
206     ("cluster-renew-crypto", qa_cluster.TestClusterRenewCrypto),
207     ("cluster-verify", qa_cluster.TestClusterVerify),
208     ("cluster-reserved-lvs", qa_cluster.TestClusterReservedLvs),
209     # TODO: add more cluster modify tests
210     ("cluster-modify", qa_cluster.TestClusterModifyEmpty),
211     ("cluster-modify", qa_cluster.TestClusterModifyIPolicy),
212     ("cluster-modify", qa_cluster.TestClusterModifyISpecs),
213     ("cluster-modify", qa_cluster.TestClusterModifyBe),
214     ("cluster-modify", qa_cluster.TestClusterModifyDisk),
215     ("cluster-modify", qa_cluster.TestClusterModifyDiskTemplates),
216     ("cluster-rename", qa_cluster.TestClusterRename),
217     ("cluster-info", qa_cluster.TestClusterVersion),
218     ("cluster-info", qa_cluster.TestClusterInfo),
219     ("cluster-info", qa_cluster.TestClusterGetmaster),
220     ("cluster-redist-conf", qa_cluster.TestClusterRedistConf),
221     (["cluster-copyfile", qa_config.NoVirtualCluster],
222      qa_cluster.TestClusterCopyfile),
223     ("cluster-command", qa_cluster.TestClusterCommand),
224     ("cluster-burnin", qa_cluster.TestClusterBurnin),
225     ("cluster-master-failover", qa_cluster.TestClusterMasterFailover),
226     ("cluster-master-failover",
227      qa_cluster.TestClusterMasterFailoverWithDrainedQueue),
228     (["cluster-oob", qa_config.NoVirtualCluster],
229      qa_cluster.TestClusterOob),
230     (qa_rapi.Enabled, qa_rapi.TestVersion),
231     (qa_rapi.Enabled, qa_rapi.TestEmptyCluster),
232     (qa_rapi.Enabled, qa_rapi.TestRapiQuery),
233     ]:
234     RunTestIf(test, fn)
235
236
237 def RunRepairDiskSizes():
238   """Run the repair disk-sizes test.
239
240   """
241   RunTestIf("cluster-repair-disk-sizes", qa_cluster.TestClusterRepairDiskSizes)
242
243
244 def RunOsTests():
245   """Runs all tests related to gnt-os.
246
247   """
248   os_enabled = ["os", qa_config.NoVirtualCluster]
249
250   if qa_config.TestEnabled(qa_rapi.Enabled):
251     rapi_getos = qa_rapi.GetOperatingSystems
252   else:
253     rapi_getos = None
254
255   for fn in [
256     qa_os.TestOsList,
257     qa_os.TestOsDiagnose,
258     ]:
259     RunTestIf(os_enabled, fn)
260
261   for fn in [
262     qa_os.TestOsValid,
263     qa_os.TestOsInvalid,
264     qa_os.TestOsPartiallyValid,
265     ]:
266     RunTestIf(os_enabled, fn, rapi_getos)
267
268   for fn in [
269     qa_os.TestOsModifyValid,
270     qa_os.TestOsModifyInvalid,
271     qa_os.TestOsStatesNonExisting,
272     ]:
273     RunTestIf(os_enabled, fn)
274
275
276 def RunCommonInstanceTests(instance, inst_nodes):
277   """Runs a few tests that are common to all disk types.
278
279   """
280   RunTestIf("instance-shutdown", qa_instance.TestInstanceShutdown, instance)
281   RunTestIf(["instance-shutdown", "instance-console", qa_rapi.Enabled],
282             qa_rapi.TestRapiStoppedInstanceConsole, instance)
283   RunTestIf(["instance-shutdown", "instance-modify"],
284             qa_instance.TestInstanceStoppedModify, instance)
285   RunTestIf("instance-shutdown", qa_instance.TestInstanceStartup, instance)
286
287   # Test shutdown/start via RAPI
288   RunTestIf(["instance-shutdown", qa_rapi.Enabled],
289             qa_rapi.TestRapiInstanceShutdown, instance)
290   RunTestIf(["instance-shutdown", qa_rapi.Enabled],
291             qa_rapi.TestRapiInstanceStartup, instance)
292
293   RunTestIf("instance-list", qa_instance.TestInstanceList)
294
295   RunTestIf("instance-info", qa_instance.TestInstanceInfo, instance)
296
297   RunTestIf("instance-modify", qa_instance.TestInstanceModify, instance)
298   RunTestIf(["instance-modify", qa_rapi.Enabled],
299             qa_rapi.TestRapiInstanceModify, instance)
300
301   RunTestIf("instance-console", qa_instance.TestInstanceConsole, instance)
302   RunTestIf(["instance-console", qa_rapi.Enabled],
303             qa_rapi.TestRapiInstanceConsole, instance)
304
305   RunTestIf("instance-device-names", qa_instance.TestInstanceDeviceNames,
306             instance)
307   DOWN_TESTS = qa_config.Either([
308     "instance-reinstall",
309     "instance-rename",
310     "instance-grow-disk",
311     ])
312
313   # shutdown instance for any 'down' tests
314   RunTestIf(DOWN_TESTS, qa_instance.TestInstanceShutdown, instance)
315
316   # now run the 'down' state tests
317   RunTestIf("instance-reinstall", qa_instance.TestInstanceReinstall, instance)
318   RunTestIf(["instance-reinstall", qa_rapi.Enabled],
319             qa_rapi.TestRapiInstanceReinstall, instance)
320
321   if qa_config.TestEnabled("instance-rename"):
322     tgt_instance = qa_config.AcquireInstance()
323     try:
324       rename_source = instance.name
325       rename_target = tgt_instance.name
326       # perform instance rename to the same name
327       RunTest(qa_instance.TestInstanceRenameAndBack,
328               rename_source, rename_source)
329       RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack,
330                 rename_source, rename_source)
331       if rename_target is not None:
332         # perform instance rename to a different name, if we have one configured
333         RunTest(qa_instance.TestInstanceRenameAndBack,
334                 rename_source, rename_target)
335         RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack,
336                   rename_source, rename_target)
337     finally:
338       tgt_instance.Release()
339
340   RunTestIf(["instance-grow-disk"], qa_instance.TestInstanceGrowDisk, instance)
341
342   # and now start the instance again
343   RunTestIf(DOWN_TESTS, qa_instance.TestInstanceStartup, instance)
344
345   RunTestIf("instance-reboot", qa_instance.TestInstanceReboot, instance)
346
347   RunTestIf("tags", qa_tags.TestInstanceTags, instance)
348
349   if instance.disk_template == constants.DT_DRBD8:
350     RunTestIf("cluster-verify",
351               qa_cluster.TestClusterVerifyDisksBrokenDRBD, instance, inst_nodes)
352   RunTestIf("cluster-verify", qa_cluster.TestClusterVerify)
353
354   RunTestIf(qa_rapi.Enabled, qa_rapi.TestInstance, instance)
355
356   # Lists instances, too
357   RunTestIf("node-list", qa_node.TestNodeList)
358
359   # Some jobs have been run, let's test listing them
360   RunTestIf("job-list", qa_job.TestJobList)
361
362
363 def RunCommonNodeTests():
364   """Run a few common node tests.
365
366   """
367   RunTestIf("node-volumes", qa_node.TestNodeVolumes)
368   RunTestIf("node-storage", qa_node.TestNodeStorage)
369   RunTestIf(["node-oob", qa_config.NoVirtualCluster], qa_node.TestOutOfBand)
370
371
372 def RunGroupListTests():
373   """Run tests for listing node groups.
374
375   """
376   RunTestIf("group-list", qa_group.TestGroupList)
377   RunTestIf("group-list", qa_group.TestGroupListFields)
378
379
380 def RunNetworkTests():
381   """Run tests for network management.
382
383   """
384   RunTestIf("network", qa_network.TestNetworkAddRemove)
385   RunTestIf("network", qa_network.TestNetworkConnect)
386
387
388 def RunGroupRwTests():
389   """Run tests for adding/removing/renaming groups.
390
391   """
392   RunTestIf("group-rwops", qa_group.TestGroupAddRemoveRename)
393   RunTestIf("group-rwops", qa_group.TestGroupAddWithOptions)
394   RunTestIf("group-rwops", qa_group.TestGroupModify)
395   RunTestIf(["group-rwops", qa_rapi.Enabled], qa_rapi.TestRapiNodeGroups)
396   RunTestIf(["group-rwops", "tags"], qa_tags.TestGroupTags,
397             qa_group.GetDefaultGroup())
398
399
400 def RunExportImportTests(instance, inodes):
401   """Tries to export and import the instance.
402
403   @type inodes: list of nodes
404   @param inodes: current nodes of the instance
405
406   """
407   # FIXME: export explicitly bails out on file based storage. other non-lvm
408   # based storage types are untested, though. Also note that import could still
409   # work, but is deeply embedded into the "export" case.
410   if (qa_config.TestEnabled("instance-export") and
411       instance.disk_template not in [constants.DT_FILE,
412                                      constants.DT_SHARED_FILE]):
413     RunTest(qa_instance.TestInstanceExportNoTarget, instance)
414
415     pnode = inodes[0]
416     expnode = qa_config.AcquireNode(exclude=pnode)
417     try:
418       name = RunTest(qa_instance.TestInstanceExport, instance, expnode)
419
420       RunTest(qa_instance.TestBackupList, expnode)
421
422       if qa_config.TestEnabled("instance-import"):
423         newinst = qa_config.AcquireInstance()
424         try:
425           RunTest(qa_instance.TestInstanceImport, newinst, pnode,
426                   expnode, name)
427           # Check if starting the instance works
428           RunTest(qa_instance.TestInstanceStartup, newinst)
429           RunTest(qa_instance.TestInstanceRemove, newinst)
430         finally:
431           newinst.Release()
432     finally:
433       expnode.Release()
434
435   # FIXME: inter-cluster-instance-move crashes on file based instances :/
436   # See Issue 414.
437   if (qa_config.TestEnabled([qa_rapi.Enabled, "inter-cluster-instance-move"])
438       and instance.disk_template != constants.DT_FILE):
439     newinst = qa_config.AcquireInstance()
440     try:
441       tnode = qa_config.AcquireNode(exclude=inodes)
442       try:
443         RunTest(qa_rapi.TestInterClusterInstanceMove, instance, newinst,
444                 inodes, tnode)
445       finally:
446         tnode.Release()
447     finally:
448       newinst.Release()
449
450
451 def RunDaemonTests(instance):
452   """Test the ganeti-watcher script.
453
454   """
455   RunTest(qa_daemon.TestPauseWatcher)
456
457   RunTestIf("instance-automatic-restart",
458             qa_daemon.TestInstanceAutomaticRestart, instance)
459   RunTestIf("instance-consecutive-failures",
460             qa_daemon.TestInstanceConsecutiveFailures, instance)
461
462   RunTest(qa_daemon.TestResumeWatcher)
463
464
465 def RunHardwareFailureTests(instance, inodes):
466   """Test cluster internal hardware failure recovery.
467
468   """
469   RunTestIf("instance-failover", qa_instance.TestInstanceFailover, instance)
470   RunTestIf(["instance-failover", qa_rapi.Enabled],
471             qa_rapi.TestRapiInstanceFailover, instance)
472
473   RunTestIf("instance-migrate", qa_instance.TestInstanceMigrate, instance)
474   RunTestIf(["instance-migrate", qa_rapi.Enabled],
475             qa_rapi.TestRapiInstanceMigrate, instance)
476
477   if qa_config.TestEnabled("instance-replace-disks"):
478     # We just need alternative secondary nodes, hence "- 1"
479     othernodes = qa_config.AcquireManyNodes(len(inodes) - 1, exclude=inodes)
480     try:
481       RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceReplaceDisks, instance)
482       RunTest(qa_instance.TestReplaceDisks,
483               instance, inodes, othernodes)
484     finally:
485       qa_config.ReleaseManyNodes(othernodes)
486     del othernodes
487
488   if qa_config.TestEnabled("instance-recreate-disks"):
489     try:
490       acquirednodes = qa_config.AcquireManyNodes(len(inodes), exclude=inodes)
491       othernodes = acquirednodes
492     except qa_error.OutOfNodesError:
493       if len(inodes) > 1:
494         # If the cluster is not big enough, let's reuse some of the nodes, but
495         # with different roles. In this way, we can test a DRBD instance even on
496         # a 3-node cluster.
497         acquirednodes = [qa_config.AcquireNode(exclude=inodes)]
498         othernodes = acquirednodes + inodes[:-1]
499       else:
500         raise
501     try:
502       RunTest(qa_instance.TestRecreateDisks,
503               instance, inodes, othernodes)
504     finally:
505       qa_config.ReleaseManyNodes(acquirednodes)
506
507   if len(inodes) >= 2:
508     RunTestIf("node-evacuate", qa_node.TestNodeEvacuate, inodes[0], inodes[1])
509     RunTestIf("node-failover", qa_node.TestNodeFailover, inodes[0], inodes[1])
510     RunTestIf("node-migrate", qa_node.TestNodeMigrate, inodes[0], inodes[1])
511
512
513 def RunExclusiveStorageTests():
514   """Test exclusive storage."""
515   if not qa_config.TestEnabled("cluster-exclusive-storage"):
516     return
517
518   node = qa_config.AcquireNode()
519   try:
520     old_es = qa_cluster.TestSetExclStorCluster(False)
521     qa_node.TestExclStorSingleNode(node)
522
523     qa_cluster.TestSetExclStorCluster(True)
524     qa_cluster.TestExclStorSharedPv(node)
525
526     if qa_config.TestEnabled("instance-add-plain-disk"):
527       # Make sure that the cluster doesn't have any pre-existing problem
528       qa_cluster.AssertClusterVerify()
529
530       # Create and allocate instances
531       instance1 = qa_instance.TestInstanceAddWithPlainDisk([node])
532       try:
533         instance2 = qa_instance.TestInstanceAddWithPlainDisk([node])
534         try:
535           # cluster-verify checks that disks are allocated correctly
536           qa_cluster.AssertClusterVerify()
537
538           # Remove instances
539           qa_instance.TestInstanceRemove(instance2)
540           qa_instance.TestInstanceRemove(instance1)
541         finally:
542           instance2.Release()
543       finally:
544         instance1.Release()
545
546     if qa_config.TestEnabled("instance-add-drbd-disk"):
547       snode = qa_config.AcquireNode()
548       try:
549         qa_cluster.TestSetExclStorCluster(False)
550         instance = qa_instance.TestInstanceAddWithDrbdDisk([node, snode])
551         try:
552           qa_cluster.TestSetExclStorCluster(True)
553           exp_err = [constants.CV_EINSTANCEUNSUITABLENODE]
554           qa_cluster.AssertClusterVerify(fail=True, errors=exp_err)
555           qa_instance.TestInstanceRemove(instance)
556         finally:
557           instance.Release()
558       finally:
559         snode.Release()
560     qa_cluster.TestSetExclStorCluster(old_es)
561   finally:
562     node.Release()
563
564
565 def _BuildSpecDict(par, mn, st, mx):
566   return {
567     constants.ISPECS_MINMAX: [{
568       constants.ISPECS_MIN: {par: mn},
569       constants.ISPECS_MAX: {par: mx},
570       }],
571     constants.ISPECS_STD: {par: st},
572     }
573
574
575 def _BuildDoubleSpecDict(index, par, mn, st, mx):
576   new_spec = {
577     constants.ISPECS_MINMAX: [{}, {}],
578     }
579   if st is not None:
580     new_spec[constants.ISPECS_STD] = {par: st}
581   new_spec[constants.ISPECS_MINMAX][index] = {
582     constants.ISPECS_MIN: {par: mn},
583     constants.ISPECS_MAX: {par: mx},
584     }
585   return new_spec
586
587
588 def TestIPolicyPlainInstance():
589   """Test instance policy interaction with instances"""
590   params = ["memory-size", "cpu-count", "disk-count", "disk-size", "nic-count"]
591   if not qa_config.IsTemplateSupported(constants.DT_PLAIN):
592     print "Template %s not supported" % constants.DT_PLAIN
593     return
594
595   # This test assumes that the group policy is empty
596   (_, old_specs) = qa_cluster.TestClusterSetISpecs()
597   # We also assume to have only one min/max bound
598   assert len(old_specs[constants.ISPECS_MINMAX]) == 1
599   node = qa_config.AcquireNode()
600   try:
601     # Log of policy changes, list of tuples:
602     # (full_change, incremental_change, policy_violated)
603     history = []
604     instance = qa_instance.TestInstanceAddWithPlainDisk([node])
605     try:
606       policyerror = [constants.CV_EINSTANCEPOLICY]
607       for par in params:
608         (iminval, imaxval) = qa_instance.GetInstanceSpec(instance.name, par)
609         # Some specs must be multiple of 4
610         new_spec = _BuildSpecDict(par, imaxval + 4, imaxval + 4, imaxval + 4)
611         history.append((None, new_spec, True))
612         if iminval > 0:
613           # Some specs must be multiple of 4
614           if iminval >= 4:
615             upper = iminval - 4
616           else:
617             upper = iminval - 1
618           new_spec = _BuildSpecDict(par, 0, upper, upper)
619           history.append((None, new_spec, True))
620         history.append((old_specs, None, False))
621
622       # Test with two instance specs
623       double_specs = copy.deepcopy(old_specs)
624       double_specs[constants.ISPECS_MINMAX] = \
625           double_specs[constants.ISPECS_MINMAX] * 2
626       (par1, par2) = params[0:2]
627       (_, imaxval1) = qa_instance.GetInstanceSpec(instance.name, par1)
628       (_, imaxval2) = qa_instance.GetInstanceSpec(instance.name, par2)
629       old_minmax = old_specs[constants.ISPECS_MINMAX][0]
630       history.extend([
631         (double_specs, None, False),
632         # The first min/max limit is being violated
633         (None,
634          _BuildDoubleSpecDict(0, par1, imaxval1 + 4, imaxval1 + 4,
635                               imaxval1 + 4),
636          False),
637         # Both min/max limits are being violated
638         (None,
639          _BuildDoubleSpecDict(1, par2, imaxval2 + 4, None, imaxval2 + 4),
640          True),
641         # The second min/max limit is being violated
642         (None,
643          _BuildDoubleSpecDict(0, par1,
644                               old_minmax[constants.ISPECS_MIN][par1],
645                               old_specs[constants.ISPECS_STD][par1],
646                               old_minmax[constants.ISPECS_MAX][par1]),
647          False),
648         (old_specs, None, False),
649         ])
650
651       # Apply the changes, and check policy violations after each change
652       qa_cluster.AssertClusterVerify()
653       for (new_specs, diff_specs, failed) in history:
654         qa_cluster.TestClusterSetISpecs(new_specs=new_specs,
655                                         diff_specs=diff_specs)
656         if failed:
657           qa_cluster.AssertClusterVerify(warnings=policyerror)
658         else:
659           qa_cluster.AssertClusterVerify()
660
661       qa_instance.TestInstanceRemove(instance)
662     finally:
663       instance.Release()
664
665     # Now we replay the same policy changes, and we expect that the instance
666     # cannot be created for the cases where we had a policy violation above
667     for (new_specs, diff_specs, failed) in history:
668       qa_cluster.TestClusterSetISpecs(new_specs=new_specs,
669                                       diff_specs=diff_specs)
670       if failed:
671         qa_instance.TestInstanceAddWithPlainDisk([node], fail=True)
672       # Instance creation with no policy violation has been tested already
673   finally:
674     node.Release()
675
676
677 def IsExclusiveStorageInstanceTestEnabled():
678   test_name = "exclusive-storage-instance-tests"
679   if qa_config.TestEnabled(test_name):
680     vgname = qa_config.get("vg-name", constants.DEFAULT_VG)
681     vgscmd = utils.ShellQuoteArgs([
682       "vgs", "--noheadings", "-o", "pv_count", vgname,
683       ])
684     nodes = qa_config.GetConfig()["nodes"]
685     for node in nodes:
686       try:
687         pvnum = int(qa_utils.GetCommandOutput(node.primary, vgscmd))
688       except Exception, e:
689         msg = ("Cannot get the number of PVs on %s, needed by '%s': %s" %
690                (node.primary, test_name, e))
691         raise qa_error.Error(msg)
692       if pvnum < 2:
693         raise qa_error.Error("Node %s has not enough PVs (%s) to run '%s'" %
694                              (node.primary, pvnum, test_name))
695     res = True
696   else:
697     res = False
698   return res
699
700
701 def RunInstanceTests():
702   """Create and exercise instances."""
703   instance_tests = [
704     ("instance-add-plain-disk", constants.DT_PLAIN,
705      qa_instance.TestInstanceAddWithPlainDisk, 1),
706     ("instance-add-drbd-disk", constants.DT_DRBD8,
707      qa_instance.TestInstanceAddWithDrbdDisk, 2),
708     ("instance-add-diskless", constants.DT_DISKLESS,
709      qa_instance.TestInstanceAddDiskless, 1),
710     ("instance-add-file", constants.DT_FILE,
711      qa_instance.TestInstanceAddFile, 1),
712     ("instance-add-shared-file", constants.DT_SHARED_FILE,
713      qa_instance.TestInstanceAddSharedFile, 1),
714     ]
715
716   for (test_name, templ, create_fun, num_nodes) in instance_tests:
717     if (qa_config.TestEnabled(test_name) and
718         qa_config.IsTemplateSupported(templ)):
719       inodes = qa_config.AcquireManyNodes(num_nodes)
720       try:
721         instance = RunTest(create_fun, inodes)
722         try:
723           RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
724           RunDaemonTests(instance)
725           for node in inodes:
726             RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, node)
727           if len(inodes) > 1:
728             RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit,
729                       constants.INITIAL_NODE_GROUP_NAME,
730                       inodes[0].primary, inodes[1].primary)
731           if qa_config.TestEnabled("instance-convert-disk"):
732             RunTest(qa_instance.TestInstanceShutdown, instance)
733             RunTest(qa_instance.TestInstanceConvertDiskToPlain,
734                     instance, inodes)
735             RunTest(qa_instance.TestInstanceStartup, instance)
736           RunTestIf("instance-modify-disks",
737                     qa_instance.TestInstanceModifyDisks, instance)
738           RunCommonInstanceTests(instance, inodes)
739           if qa_config.TestEnabled("instance-modify-primary"):
740             othernode = qa_config.AcquireNode()
741             RunTest(qa_instance.TestInstanceModifyPrimaryAndBack,
742                     instance, inodes[0], othernode)
743             othernode.Release()
744           RunGroupListTests()
745           RunExportImportTests(instance, inodes)
746           RunHardwareFailureTests(instance, inodes)
747           RunRepairDiskSizes()
748           RunTest(qa_instance.TestInstanceRemove, instance)
749         finally:
750           instance.Release()
751         del instance
752       finally:
753         qa_config.ReleaseManyNodes(inodes)
754       qa_cluster.AssertClusterVerify()
755
756
757 def RunMonitoringTests():
758   if qa_config.TestEnabled("mon-collector"):
759     RunTest(qa_monitoring.TestInstStatusCollector)
760
761
762 def RunQa():
763   """Main QA body.
764
765   """
766   rapi_user = "ganeti-qa"
767
768   RunEnvTests()
769   rapi_secret = SetupCluster(rapi_user)
770
771   if qa_rapi.Enabled():
772     # Load RAPI certificate
773     qa_rapi.Setup(rapi_user, rapi_secret)
774
775   RunClusterTests()
776   RunOsTests()
777
778   RunTestIf("tags", qa_tags.TestClusterTags)
779
780   RunCommonNodeTests()
781   RunGroupListTests()
782   RunGroupRwTests()
783   RunNetworkTests()
784
785   # The master shouldn't be readded or put offline; "delay" needs a non-master
786   # node to test
787   pnode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
788   try:
789     RunTestIf("node-readd", qa_node.TestNodeReadd, pnode)
790     RunTestIf("node-modify", qa_node.TestNodeModify, pnode)
791     RunTestIf("delay", qa_cluster.TestDelay, pnode)
792   finally:
793     pnode.Release()
794
795   # Make sure the cluster is clean before running instance tests
796   qa_cluster.AssertClusterVerify()
797
798   pnode = qa_config.AcquireNode()
799   try:
800     RunTestIf("tags", qa_tags.TestNodeTags, pnode)
801
802     if qa_rapi.Enabled():
803       RunTest(qa_rapi.TestNode, pnode)
804
805       if qa_config.TestEnabled("instance-add-plain-disk"):
806         for use_client in [True, False]:
807           rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode,
808                                   use_client)
809           try:
810             if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
811               RunCommonInstanceTests(rapi_instance, [pnode])
812             RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
813           finally:
814             rapi_instance.Release()
815           del rapi_instance
816
817   finally:
818     pnode.Release()
819
820   config_list = [
821     ("default-instance-tests", lambda: None, lambda _: None),
822     (IsExclusiveStorageInstanceTestEnabled,
823      lambda: qa_cluster.TestSetExclStorCluster(True),
824      qa_cluster.TestSetExclStorCluster),
825   ]
826   for (conf_name, setup_conf_f, restore_conf_f) in config_list:
827     if qa_config.TestEnabled(conf_name):
828       oldconf = setup_conf_f()
829       RunInstanceTests()
830       restore_conf_f(oldconf)
831
832   pnode = qa_config.AcquireNode()
833   try:
834     if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]):
835       for shutdown in [False, True]:
836         instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, [pnode])
837         try:
838           expnode = qa_config.AcquireNode(exclude=pnode)
839           try:
840             if shutdown:
841               # Stop instance before exporting and removing it
842               RunTest(qa_instance.TestInstanceShutdown, instance)
843             RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode)
844             RunTest(qa_instance.TestBackupList, expnode)
845           finally:
846             expnode.Release()
847         finally:
848           instance.Release()
849         del expnode
850         del instance
851       qa_cluster.AssertClusterVerify()
852
853   finally:
854     pnode.Release()
855
856   RunExclusiveStorageTests()
857   RunTestIf(["cluster-instance-policy", "instance-add-plain-disk"],
858             TestIPolicyPlainInstance)
859
860   RunTestIf(
861     "instance-add-restricted-by-disktemplates",
862     qa_instance.TestInstanceCreationRestrictedByDiskTemplates)
863
864   # Test removing instance with offline drbd secondary
865   if qa_config.TestEnabled(["instance-remove-drbd-offline",
866                             "instance-add-drbd-disk"]):
867     # Make sure the master is not put offline
868     snode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
869     try:
870       pnode = qa_config.AcquireNode(exclude=snode)
871       try:
872         instance = qa_instance.TestInstanceAddWithDrbdDisk([pnode, snode])
873         set_offline = lambda node: qa_node.MakeNodeOffline(node, "yes")
874         set_online = lambda node: qa_node.MakeNodeOffline(node, "no")
875         RunTest(qa_instance.TestRemoveInstanceOfflineNode, instance, snode,
876                 set_offline, set_online)
877       finally:
878         pnode.Release()
879     finally:
880       snode.Release()
881     qa_cluster.AssertClusterVerify()
882
883   RunMonitoringTests()
884
885   RunTestIf("create-cluster", qa_node.TestNodeRemoveAll)
886
887   RunTestIf("cluster-destroy", qa_cluster.TestClusterDestroy)
888
889
890 @UsesRapiClient
891 def main():
892   """Main program.
893
894   """
895   parser = optparse.OptionParser(usage="%prog [options] <config-file>")
896   parser.add_option("--yes-do-it", dest="yes_do_it",
897                     action="store_true",
898                     help="Really execute the tests")
899   (opts, args) = parser.parse_args()
900
901   if len(args) == 1:
902     (config_file, ) = args
903   else:
904     parser.error("Wrong number of arguments.")
905
906   if not opts.yes_do_it:
907     print ("Executing this script irreversibly destroys any Ganeti\n"
908            "configuration on all nodes involved. If you really want\n"
909            "to start testing, supply the --yes-do-it option.")
910     sys.exit(1)
911
912   qa_config.Load(config_file)
913
914   primary = qa_config.GetMasterNode().primary
915   qa_utils.StartMultiplexer(primary)
916   print ("SSH command for primary node: %s" %
917          utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, "")))
918   print ("SSH command for other nodes: %s" %
919          utils.ShellQuoteArgs(qa_utils.GetSSHCommand("NODE", "")))
920   try:
921     RunQa()
922   finally:
923     qa_utils.CloseMultiplexers()
924
925 if __name__ == "__main__":
926   main()