--select-instances hbal manpage update
[ganeti-local] / qa / qa_cluster.py
1 #
2 #
3
4 # Copyright (C) 2007, 2010, 2011 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Cluster related QA tests.
23
24 """
25
26 import tempfile
27 import os.path
28
29 from ganeti import constants
30 from ganeti import compat
31 from ganeti import utils
32
33 import qa_config
34 import qa_utils
35 import qa_error
36
37 from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
38
39
40 def _RemoveFileFromAllNodes(filename):
41   """Removes a file from all nodes.
42
43   """
44   for node in qa_config.get("nodes"):
45     AssertCommand(["rm", "-f", filename], node=node)
46
47
48 def _CheckFileOnAllNodes(filename, content):
49   """Verifies the content of the given file on all nodes.
50
51   """
52   cmd = utils.ShellQuoteArgs(["cat", filename])
53   for node in qa_config.get("nodes"):
54     AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
55
56
57 def TestClusterInit(rapi_user, rapi_secret):
58   """gnt-cluster init"""
59   master = qa_config.GetMasterNode()
60
61   rapi_dir = os.path.dirname(constants.RAPI_USERS_FILE)
62
63   # First create the RAPI credentials
64   fh = tempfile.NamedTemporaryFile()
65   try:
66     fh.write("%s %s write\n" % (rapi_user, rapi_secret))
67     fh.flush()
68
69     tmpru = qa_utils.UploadFile(master["primary"], fh.name)
70     try:
71       AssertCommand(["mkdir", "-p", rapi_dir])
72       AssertCommand(["mv", tmpru, constants.RAPI_USERS_FILE])
73     finally:
74       AssertCommand(["rm", "-f", tmpru])
75   finally:
76     fh.close()
77
78   # Initialize cluster
79   cmd = ['gnt-cluster', 'init']
80
81   cmd.append("--primary-ip-version=%d" %
82              qa_config.get("primary_ip_version", 4))
83
84   if master.get('secondary', None):
85     cmd.append('--secondary-ip=%s' % master['secondary'])
86
87   bridge = qa_config.get('bridge', None)
88   if bridge:
89     cmd.append('--bridge=%s' % bridge)
90     cmd.append('--master-netdev=%s' % bridge)
91
92   htype = qa_config.get('enabled-hypervisors', None)
93   if htype:
94     cmd.append('--enabled-hypervisors=%s' % htype)
95
96   cmd.append(qa_config.get('name'))
97
98   AssertCommand(cmd)
99
100
101 def TestClusterRename():
102   """gnt-cluster rename"""
103   cmd = ['gnt-cluster', 'rename', '-f']
104
105   original_name = qa_config.get('name')
106   rename_target = qa_config.get('rename', None)
107   if rename_target is None:
108     print qa_utils.FormatError('"rename" entry is missing')
109     return
110
111   cmd_verify = ['gnt-cluster', 'verify']
112
113   for data in [
114     cmd + [rename_target],
115     cmd_verify,
116     cmd + [original_name],
117     cmd_verify,
118     ]:
119     AssertCommand(data)
120
121
122 def TestClusterOob():
123   """out-of-band framework"""
124   oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
125
126   AssertCommand(["gnt-cluster", "verify"])
127   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
128                  "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
129                  utils.NewUUID()])
130
131   AssertCommand(["gnt-cluster", "verify"], fail=True)
132
133   AssertCommand(["touch", oob_path_exists])
134   AssertCommand(["chmod", "0400", oob_path_exists])
135   AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
136
137   try:
138     AssertCommand(["gnt-cluster", "modify", "--node-parameters",
139                    "oob_program=%s" % oob_path_exists])
140
141     AssertCommand(["gnt-cluster", "verify"], fail=True)
142
143     AssertCommand(["chmod", "0500", oob_path_exists])
144     AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
145
146     AssertCommand(["gnt-cluster", "verify"])
147   finally:
148     AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
149
150   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
151                  "oob_program="])
152
153
154 def TestClusterEpo():
155   """gnt-cluster epo"""
156   master = qa_config.GetMasterNode()
157
158   # Assert that OOB is unavailable for all nodes
159   result_output = GetCommandOutput(master["primary"],
160                                    "gnt-node list --verbose --no-header -o"
161                                    " powered")
162   AssertEqual(compat.all(powered == "(unavail)"
163                          for powered in result_output.splitlines()), True)
164
165   # Conflicting
166   AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
167   # --all doesn't expect arguments
168   AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
169
170   # Unless --all is given master is not allowed to be in the list
171   AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
172
173   # This shouldn't fail
174   AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
175
176   # All instances should have been stopped now
177   result_output = GetCommandOutput(master["primary"],
178                                    "gnt-instance list --no-header -o status")
179   AssertEqual(compat.all(status == "ADMIN_down"
180                          for status in result_output.splitlines()), True)
181
182   # Now start everything again
183   AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
184
185   # All instances should have been started now
186   result_output = GetCommandOutput(master["primary"],
187                                    "gnt-instance list --no-header -o status")
188   AssertEqual(compat.all(status == "running"
189                          for status in result_output.splitlines()), True)
190
191
192 def TestClusterVerify():
193   """gnt-cluster verify"""
194   AssertCommand(["gnt-cluster", "verify"])
195   AssertCommand(["gnt-cluster", "verify-disks"])
196
197
198 def TestJobqueue():
199   """gnt-debug test-jobqueue"""
200   AssertCommand(["gnt-debug", "test-jobqueue"])
201
202
203 def TestClusterReservedLvs():
204   """gnt-cluster reserved lvs"""
205   CVERIFY = ["gnt-cluster", "verify"]
206   for fail, cmd in [
207     (False, CVERIFY),
208     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
209     (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
210     (True,  CVERIFY),
211     (False, ["gnt-cluster", "modify", "--reserved-lvs",
212              "xenvg/qa-test,.*/other-test"]),
213     (False, CVERIFY),
214     (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
215     (False, CVERIFY),
216     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
217     (True,  CVERIFY),
218     (False, ["lvremove", "-f", "xenvg/qa-test"]),
219     (False, CVERIFY),
220     ]:
221     AssertCommand(cmd, fail=fail)
222
223
224 def TestClusterModifyBe():
225   """gnt-cluster modify -B"""
226   for fail, cmd in [
227     # mem
228     (False, ["gnt-cluster", "modify", "-B", "memory=256"]),
229     (False, ["sh", "-c", "gnt-cluster info|grep '^ *memory: 256$'"]),
230     (True,  ["gnt-cluster", "modify", "-B", "memory=a"]),
231     (False, ["gnt-cluster", "modify", "-B", "memory=128"]),
232     (False, ["sh", "-c", "gnt-cluster info|grep '^ *memory: 128$'"]),
233     # vcpus
234     (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
235     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
236     (True,  ["gnt-cluster", "modify", "-B", "vcpus=a"]),
237     (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
238     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
239     # auto_balance
240     (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
241     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
242     (True,  ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
243     (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
244     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
245     ]:
246     AssertCommand(cmd, fail=fail)
247
248
249 def TestClusterInfo():
250   """gnt-cluster info"""
251   AssertCommand(["gnt-cluster", "info"])
252
253
254 def TestClusterRedistConf():
255   """gnt-cluster redist-conf"""
256   AssertCommand(["gnt-cluster", "redist-conf"])
257
258
259 def TestClusterGetmaster():
260   """gnt-cluster getmaster"""
261   AssertCommand(["gnt-cluster", "getmaster"])
262
263
264 def TestClusterVersion():
265   """gnt-cluster version"""
266   AssertCommand(["gnt-cluster", "version"])
267
268
269 def TestClusterRenewCrypto():
270   """gnt-cluster renew-crypto"""
271   master = qa_config.GetMasterNode()
272
273   # Conflicting options
274   cmd = ["gnt-cluster", "renew-crypto", "--force",
275          "--new-cluster-certificate", "--new-confd-hmac-key"]
276   conflicting = [
277     ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
278     ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
279     ]
280   for i in conflicting:
281     AssertCommand(cmd+i, fail=True)
282
283   # Invalid RAPI certificate
284   cmd = ["gnt-cluster", "renew-crypto", "--force",
285          "--rapi-certificate=/dev/null"]
286   AssertCommand(cmd, fail=True)
287
288   rapi_cert_backup = qa_utils.BackupFile(master["primary"],
289                                          constants.RAPI_CERT_FILE)
290   try:
291     # Custom RAPI certificate
292     fh = tempfile.NamedTemporaryFile()
293
294     # Ensure certificate doesn't cause "gnt-cluster verify" to complain
295     validity = constants.SSL_CERT_EXPIRATION_WARN * 3
296
297     utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
298
299     tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
300     try:
301       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
302                      "--rapi-certificate=%s" % tmpcert])
303     finally:
304       AssertCommand(["rm", "-f", tmpcert])
305
306     # Custom cluster domain secret
307     cds_fh = tempfile.NamedTemporaryFile()
308     cds_fh.write(utils.GenerateSecret())
309     cds_fh.write("\n")
310     cds_fh.flush()
311
312     tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
313     try:
314       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
315                      "--cluster-domain-secret=%s" % tmpcds])
316     finally:
317       AssertCommand(["rm", "-f", tmpcds])
318
319     # Normal case
320     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
321                    "--new-cluster-certificate", "--new-confd-hmac-key",
322                    "--new-rapi-certificate", "--new-cluster-domain-secret"])
323
324     # Restore RAPI certificate
325     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
326                    "--rapi-certificate=%s" % rapi_cert_backup])
327   finally:
328     AssertCommand(["rm", "-f", rapi_cert_backup])
329
330
331 def TestClusterBurnin():
332   """Burnin"""
333   master = qa_config.GetMasterNode()
334
335   options = qa_config.get('options', {})
336   disk_template = options.get('burnin-disk-template', 'drbd')
337   parallel = options.get('burnin-in-parallel', False)
338   check_inst = options.get('burnin-check-instances', False)
339   do_rename = options.get('burnin-rename', '')
340   do_reboot = options.get('burnin-reboot', True)
341   reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
342
343   # Get as many instances as we need
344   instances = []
345   try:
346     try:
347       num = qa_config.get('options', {}).get('burnin-instances', 1)
348       for _ in range(0, num):
349         instances.append(qa_config.AcquireInstance())
350     except qa_error.OutOfInstancesError:
351       print "Not enough instances, continuing anyway."
352
353     if len(instances) < 1:
354       raise qa_error.Error("Burnin needs at least one instance")
355
356     script = qa_utils.UploadFile(master['primary'], '../tools/burnin')
357     try:
358       # Run burnin
359       cmd = [script,
360              '--os=%s' % qa_config.get('os'),
361              '--disk-size=%s' % ",".join(qa_config.get('disk')),
362              '--disk-growth=%s' % ",".join(qa_config.get('disk-growth')),
363              '--disk-template=%s' % disk_template]
364       if parallel:
365         cmd.append('--parallel')
366         cmd.append('--early-release')
367       if check_inst:
368         cmd.append('--http-check')
369       if do_rename:
370         cmd.append('--rename=%s' % do_rename)
371       if not do_reboot:
372         cmd.append('--no-reboot')
373       else:
374         cmd.append('--reboot-types=%s' % ",".join(reboot_types))
375       cmd += [inst['name'] for inst in instances]
376       AssertCommand(cmd)
377     finally:
378       AssertCommand(["rm", "-f", script])
379
380   finally:
381     for inst in instances:
382       qa_config.ReleaseInstance(inst)
383
384
385 def TestClusterMasterFailover():
386   """gnt-cluster master-failover"""
387   master = qa_config.GetMasterNode()
388   failovermaster = qa_config.AcquireNode(exclude=master)
389
390   cmd = ["gnt-cluster", "master-failover"]
391   try:
392     AssertCommand(cmd, node=failovermaster)
393     # Back to original master node
394     AssertCommand(cmd, node=master)
395   finally:
396     qa_config.ReleaseNode(failovermaster)
397
398
399 def TestClusterMasterFailoverWithDrainedQueue():
400   """gnt-cluster master-failover with drained queue"""
401   drain_check = ["test", "-f", constants.JOB_QUEUE_DRAIN_FILE]
402
403   master = qa_config.GetMasterNode()
404   failovermaster = qa_config.AcquireNode(exclude=master)
405
406   # Ensure queue is not drained
407   for node in [master, failovermaster]:
408     AssertCommand(drain_check, node=node, fail=True)
409
410   # Drain queue on failover master
411   AssertCommand(["touch", constants.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
412
413   cmd = ["gnt-cluster", "master-failover"]
414   try:
415     AssertCommand(drain_check, node=failovermaster)
416     AssertCommand(cmd, node=failovermaster)
417     AssertCommand(drain_check, fail=True)
418     AssertCommand(drain_check, node=failovermaster, fail=True)
419
420     # Back to original master node
421     AssertCommand(cmd, node=master)
422   finally:
423     qa_config.ReleaseNode(failovermaster)
424
425   AssertCommand(drain_check, fail=True)
426   AssertCommand(drain_check, node=failovermaster, fail=True)
427
428
429 def TestClusterCopyfile():
430   """gnt-cluster copyfile"""
431   master = qa_config.GetMasterNode()
432
433   uniqueid = utils.NewUUID()
434
435   # Create temporary file
436   f = tempfile.NamedTemporaryFile()
437   f.write(uniqueid)
438   f.flush()
439   f.seek(0)
440
441   # Upload file to master node
442   testname = qa_utils.UploadFile(master['primary'], f.name)
443   try:
444     # Copy file to all nodes
445     AssertCommand(["gnt-cluster", "copyfile", testname])
446     _CheckFileOnAllNodes(testname, uniqueid)
447   finally:
448     _RemoveFileFromAllNodes(testname)
449
450
451 def TestClusterCommand():
452   """gnt-cluster command"""
453   uniqueid = utils.NewUUID()
454   rfile = "/tmp/gnt%s" % utils.NewUUID()
455   rcmd = utils.ShellQuoteArgs(['echo', '-n', uniqueid])
456   cmd = utils.ShellQuoteArgs(['gnt-cluster', 'command',
457                               "%s >%s" % (rcmd, rfile)])
458
459   try:
460     AssertCommand(cmd)
461     _CheckFileOnAllNodes(rfile, uniqueid)
462   finally:
463     _RemoveFileFromAllNodes(rfile)
464
465
466 def TestClusterDestroy():
467   """gnt-cluster destroy"""
468   AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
469
470
471 def TestClusterRepairDiskSizes():
472   """gnt-cluster repair-disk-sizes"""
473   AssertCommand(["gnt-cluster", "repair-disk-sizes"])