gnt-cluster master-failover: Undrain queue
[ganeti-local] / qa / qa_cluster.py
1 #
2 #
3
4 # Copyright (C) 2007, 2010, 2011 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Cluster related QA tests.
23
24 """
25
26 import tempfile
27 import os.path
28
29 from ganeti import constants
30 from ganeti import compat
31 from ganeti import utils
32
33 import qa_config
34 import qa_utils
35 import qa_error
36
37 from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
38
39
40 def _RemoveFileFromAllNodes(filename):
41   """Removes a file from all nodes.
42
43   """
44   for node in qa_config.get("nodes"):
45     AssertCommand(["rm", "-f", filename], node=node)
46
47
48 def _CheckFileOnAllNodes(filename, content):
49   """Verifies the content of the given file on all nodes.
50
51   """
52   cmd = utils.ShellQuoteArgs(["cat", filename])
53   for node in qa_config.get("nodes"):
54     AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
55
56
57 def TestClusterInit(rapi_user, rapi_secret):
58   """gnt-cluster init"""
59   master = qa_config.GetMasterNode()
60
61   rapi_dir = os.path.dirname(constants.RAPI_USERS_FILE)
62
63   # First create the RAPI credentials
64   fh = tempfile.NamedTemporaryFile()
65   try:
66     fh.write("%s %s write\n" % (rapi_user, rapi_secret))
67     fh.flush()
68
69     tmpru = qa_utils.UploadFile(master["primary"], fh.name)
70     try:
71       AssertCommand(["mkdir", "-p", rapi_dir])
72       AssertCommand(["mv", tmpru, constants.RAPI_USERS_FILE])
73     finally:
74       AssertCommand(["rm", "-f", tmpru])
75   finally:
76     fh.close()
77
78   # Initialize cluster
79   cmd = ['gnt-cluster', 'init']
80
81   cmd.append("--primary-ip-version=%d" %
82              qa_config.get("primary_ip_version", 4))
83
84   if master.get('secondary', None):
85     cmd.append('--secondary-ip=%s' % master['secondary'])
86
87   bridge = qa_config.get('bridge', None)
88   if bridge:
89     cmd.append('--bridge=%s' % bridge)
90     cmd.append('--master-netdev=%s' % bridge)
91
92   htype = qa_config.get('enabled-hypervisors', None)
93   if htype:
94     cmd.append('--enabled-hypervisors=%s' % htype)
95
96   cmd.append(qa_config.get('name'))
97
98   AssertCommand(cmd)
99
100
101 def TestClusterRename():
102   """gnt-cluster rename"""
103   cmd = ['gnt-cluster', 'rename', '-f']
104
105   original_name = qa_config.get('name')
106   rename_target = qa_config.get('rename', None)
107   if rename_target is None:
108     print qa_utils.FormatError('"rename" entry is missing')
109     return
110
111   cmd_verify = ['gnt-cluster', 'verify']
112
113   for data in [
114     cmd + [rename_target],
115     cmd_verify,
116     cmd + [original_name],
117     cmd_verify,
118     ]:
119     AssertCommand(data)
120
121
122 def TestClusterOob():
123   """out-of-band framework"""
124   oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
125
126   AssertCommand(["gnt-cluster", "verify"])
127   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
128                  "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
129                  utils.NewUUID()])
130
131   AssertCommand(["gnt-cluster", "verify"], fail=True)
132
133   AssertCommand(["touch", oob_path_exists])
134   AssertCommand(["chmod", "0400", oob_path_exists])
135   AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
136
137   try:
138     AssertCommand(["gnt-cluster", "modify", "--node-parameters",
139                    "oob_program=%s" % oob_path_exists])
140
141     AssertCommand(["gnt-cluster", "verify"], fail=True)
142
143     AssertCommand(["chmod", "0500", oob_path_exists])
144     AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
145
146     AssertCommand(["gnt-cluster", "verify"])
147   finally:
148     AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
149
150   AssertCommand(["gnt-cluster", "modify", "--node-parameters",
151                  "oob_program="])
152
153
154 def TestClusterEpo():
155   """gnt-cluster epo"""
156   master = qa_config.GetMasterNode()
157
158   # Assert that OOB is unavailable for all nodes
159   result_output = GetCommandOutput(master["primary"],
160                                    "gnt-node list --verbose --no-header -o"
161                                    " powered")
162   AssertEqual(compat.all(powered == "(unavail)"
163                          for powered in result_output.splitlines()), True)
164
165   # Conflicting
166   AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
167   # --all doesn't expect arguments
168   AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
169
170   # Unless --all is given master is not allowed to be in the list
171   AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
172
173   # This shouldn't fail
174   AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
175
176   # All instances should have been stopped now
177   result_output = GetCommandOutput(master["primary"],
178                                    "gnt-instance list --no-header -o status")
179   AssertEqual(compat.all(status == "ADMIN_down"
180                          for status in result_output.splitlines()), True)
181
182   # Now start everything again
183   AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
184
185   # All instances should have been started now
186   result_output = GetCommandOutput(master["primary"],
187                                    "gnt-instance list --no-header -o status")
188   AssertEqual(compat.all(status == "running"
189                          for status in result_output.splitlines()), True)
190
191
192 def TestClusterVerify():
193   """gnt-cluster verify"""
194   AssertCommand(["gnt-cluster", "verify"])
195   AssertCommand(["gnt-cluster", "verify-disks"])
196
197
198 def TestJobqueue():
199   """gnt-debug test-jobqueue"""
200   AssertCommand(["gnt-debug", "test-jobqueue"])
201
202
203 def TestClusterReservedLvs():
204   """gnt-cluster reserved lvs"""
205   CVERIFY = ["gnt-cluster", "verify"]
206   for fail, cmd in [
207     (False, CVERIFY),
208     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
209     (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
210     (True,  CVERIFY),
211     (False, ["gnt-cluster", "modify", "--reserved-lvs",
212              "xenvg/qa-test,.*/other-test"]),
213     (False, CVERIFY),
214     (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
215     (False, CVERIFY),
216     (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
217     (True,  CVERIFY),
218     (False, ["lvremove", "-f", "xenvg/qa-test"]),
219     (False, CVERIFY),
220     ]:
221     AssertCommand(cmd, fail=fail)
222
223
224 def TestClusterModifyBe():
225   """gnt-cluster modify -B"""
226   for fail, cmd in [
227     # mem
228     (False, ["gnt-cluster", "modify", "-B", "memory=256"]),
229     (False, ["sh", "-c", "gnt-cluster info|grep '^ *memory: 256$'"]),
230     (True,  ["gnt-cluster", "modify", "-B", "memory=a"]),
231     (False, ["gnt-cluster", "modify", "-B", "memory=128"]),
232     (False, ["sh", "-c", "gnt-cluster info|grep '^ *memory: 128$'"]),
233     # vcpus
234     (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
235     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
236     (True,  ["gnt-cluster", "modify", "-B", "vcpus=a"]),
237     (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
238     (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
239     # auto_balance
240     (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
241     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
242     (True,  ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
243     (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
244     (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
245     ]:
246     AssertCommand(cmd, fail=fail)
247
248
249 def TestClusterInfo():
250   """gnt-cluster info"""
251   AssertCommand(["gnt-cluster", "info"])
252
253
254 def TestClusterGetmaster():
255   """gnt-cluster getmaster"""
256   AssertCommand(["gnt-cluster", "getmaster"])
257
258
259 def TestClusterVersion():
260   """gnt-cluster version"""
261   AssertCommand(["gnt-cluster", "version"])
262
263
264 def TestClusterRenewCrypto():
265   """gnt-cluster renew-crypto"""
266   master = qa_config.GetMasterNode()
267
268   # Conflicting options
269   cmd = ["gnt-cluster", "renew-crypto", "--force",
270          "--new-cluster-certificate", "--new-confd-hmac-key"]
271   conflicting = [
272     ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
273     ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
274     ]
275   for i in conflicting:
276     AssertCommand(cmd+i, fail=True)
277
278   # Invalid RAPI certificate
279   cmd = ["gnt-cluster", "renew-crypto", "--force",
280          "--rapi-certificate=/dev/null"]
281   AssertCommand(cmd, fail=True)
282
283   rapi_cert_backup = qa_utils.BackupFile(master["primary"],
284                                          constants.RAPI_CERT_FILE)
285   try:
286     # Custom RAPI certificate
287     fh = tempfile.NamedTemporaryFile()
288
289     # Ensure certificate doesn't cause "gnt-cluster verify" to complain
290     validity = constants.SSL_CERT_EXPIRATION_WARN * 3
291
292     utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
293
294     tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
295     try:
296       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
297                      "--rapi-certificate=%s" % tmpcert])
298     finally:
299       AssertCommand(["rm", "-f", tmpcert])
300
301     # Custom cluster domain secret
302     cds_fh = tempfile.NamedTemporaryFile()
303     cds_fh.write(utils.GenerateSecret())
304     cds_fh.write("\n")
305     cds_fh.flush()
306
307     tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
308     try:
309       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
310                      "--cluster-domain-secret=%s" % tmpcds])
311     finally:
312       AssertCommand(["rm", "-f", tmpcds])
313
314     # Normal case
315     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
316                    "--new-cluster-certificate", "--new-confd-hmac-key",
317                    "--new-rapi-certificate", "--new-cluster-domain-secret"])
318
319     # Restore RAPI certificate
320     AssertCommand(["gnt-cluster", "renew-crypto", "--force",
321                    "--rapi-certificate=%s" % rapi_cert_backup])
322   finally:
323     AssertCommand(["rm", "-f", rapi_cert_backup])
324
325
326 def TestClusterBurnin():
327   """Burnin"""
328   master = qa_config.GetMasterNode()
329
330   options = qa_config.get('options', {})
331   disk_template = options.get('burnin-disk-template', 'drbd')
332   parallel = options.get('burnin-in-parallel', False)
333   check_inst = options.get('burnin-check-instances', False)
334   do_rename = options.get('burnin-rename', '')
335   do_reboot = options.get('burnin-reboot', True)
336   reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
337
338   # Get as many instances as we need
339   instances = []
340   try:
341     try:
342       num = qa_config.get('options', {}).get('burnin-instances', 1)
343       for _ in range(0, num):
344         instances.append(qa_config.AcquireInstance())
345     except qa_error.OutOfInstancesError:
346       print "Not enough instances, continuing anyway."
347
348     if len(instances) < 1:
349       raise qa_error.Error("Burnin needs at least one instance")
350
351     script = qa_utils.UploadFile(master['primary'], '../tools/burnin')
352     try:
353       # Run burnin
354       cmd = [script,
355              '--os=%s' % qa_config.get('os'),
356              '--disk-size=%s' % ",".join(qa_config.get('disk')),
357              '--disk-growth=%s' % ",".join(qa_config.get('disk-growth')),
358              '--disk-template=%s' % disk_template]
359       if parallel:
360         cmd.append('--parallel')
361         cmd.append('--early-release')
362       if check_inst:
363         cmd.append('--http-check')
364       if do_rename:
365         cmd.append('--rename=%s' % do_rename)
366       if not do_reboot:
367         cmd.append('--no-reboot')
368       else:
369         cmd.append('--reboot-types=%s' % ",".join(reboot_types))
370       cmd += [inst['name'] for inst in instances]
371       AssertCommand(cmd)
372     finally:
373       AssertCommand(["rm", "-f", script])
374
375   finally:
376     for inst in instances:
377       qa_config.ReleaseInstance(inst)
378
379
380 def TestClusterMasterFailover():
381   """gnt-cluster master-failover"""
382   master = qa_config.GetMasterNode()
383   failovermaster = qa_config.AcquireNode(exclude=master)
384
385   cmd = ["gnt-cluster", "master-failover"]
386   try:
387     AssertCommand(cmd, node=failovermaster)
388     # Back to original master node
389     AssertCommand(cmd, node=master)
390   finally:
391     qa_config.ReleaseNode(failovermaster)
392
393
394 def TestClusterMasterFailoverWithDrainedQueue():
395   """gnt-cluster master-failover with drained queue"""
396   drain_check = ["test", "-f", constants.JOB_QUEUE_DRAIN_FILE]
397
398   master = qa_config.GetMasterNode()
399   failovermaster = qa_config.AcquireNode(exclude=master)
400
401   # Ensure queue is not drained
402   for node in [master, failovermaster]:
403     AssertCommand(drain_check, node=node, fail=True)
404
405   # Drain queue on failover master
406   AssertCommand(["touch", constants.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
407
408   cmd = ["gnt-cluster", "master-failover"]
409   try:
410     AssertCommand(drain_check, node=failovermaster)
411     AssertCommand(cmd, node=failovermaster)
412     AssertCommand(drain_check, fail=True)
413     AssertCommand(drain_check, node=failovermaster, fail=True)
414
415     # Back to original master node
416     AssertCommand(cmd, node=master)
417   finally:
418     qa_config.ReleaseNode(failovermaster)
419
420   AssertCommand(drain_check, fail=True)
421   AssertCommand(drain_check, node=failovermaster, fail=True)
422
423
424 def TestClusterCopyfile():
425   """gnt-cluster copyfile"""
426   master = qa_config.GetMasterNode()
427
428   uniqueid = utils.NewUUID()
429
430   # Create temporary file
431   f = tempfile.NamedTemporaryFile()
432   f.write(uniqueid)
433   f.flush()
434   f.seek(0)
435
436   # Upload file to master node
437   testname = qa_utils.UploadFile(master['primary'], f.name)
438   try:
439     # Copy file to all nodes
440     AssertCommand(["gnt-cluster", "copyfile", testname])
441     _CheckFileOnAllNodes(testname, uniqueid)
442   finally:
443     _RemoveFileFromAllNodes(testname)
444
445
446 def TestClusterCommand():
447   """gnt-cluster command"""
448   uniqueid = utils.NewUUID()
449   rfile = "/tmp/gnt%s" % utils.NewUUID()
450   rcmd = utils.ShellQuoteArgs(['echo', '-n', uniqueid])
451   cmd = utils.ShellQuoteArgs(['gnt-cluster', 'command',
452                               "%s >%s" % (rcmd, rfile)])
453
454   try:
455     AssertCommand(cmd)
456     _CheckFileOnAllNodes(rfile, uniqueid)
457   finally:
458     _RemoveFileFromAllNodes(rfile)
459
460
461 def TestClusterDestroy():
462   """gnt-cluster destroy"""
463   AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])