root / qa / qa_cluster.py @ 3e0ed18c
History | View | Annotate | Download (15.2 kB)
1 |
#
|
---|---|
2 |
#
|
3 |
|
4 |
# Copyright (C) 2007, 2010, 2011 Google Inc.
|
5 |
#
|
6 |
# This program is free software; you can redistribute it and/or modify
|
7 |
# it under the terms of the GNU General Public License as published by
|
8 |
# the Free Software Foundation; either version 2 of the License, or
|
9 |
# (at your option) any later version.
|
10 |
#
|
11 |
# This program is distributed in the hope that it will be useful, but
|
12 |
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
# General Public License for more details.
|
15 |
#
|
16 |
# You should have received a copy of the GNU General Public License
|
17 |
# along with this program; if not, write to the Free Software
|
18 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
19 |
# 02110-1301, USA.
|
20 |
|
21 |
|
22 |
"""Cluster related QA tests.
|
23 |
|
24 |
"""
|
25 |
|
26 |
import tempfile |
27 |
import os.path |
28 |
|
29 |
from ganeti import constants |
30 |
from ganeti import compat |
31 |
from ganeti import utils |
32 |
|
33 |
import qa_config |
34 |
import qa_utils |
35 |
import qa_error |
36 |
|
37 |
from qa_utils import AssertEqual, AssertCommand, GetCommandOutput |
38 |
|
39 |
|
40 |
#: cluster verify command
|
41 |
_CLUSTER_VERIFY = ["gnt-cluster", "verify"] |
42 |
|
43 |
|
44 |
def _RemoveFileFromAllNodes(filename): |
45 |
"""Removes a file from all nodes.
|
46 |
|
47 |
"""
|
48 |
for node in qa_config.get("nodes"): |
49 |
AssertCommand(["rm", "-f", filename], node=node) |
50 |
|
51 |
|
52 |
def _CheckFileOnAllNodes(filename, content): |
53 |
"""Verifies the content of the given file on all nodes.
|
54 |
|
55 |
"""
|
56 |
cmd = utils.ShellQuoteArgs(["cat", filename])
|
57 |
for node in qa_config.get("nodes"): |
58 |
AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
|
59 |
|
60 |
|
61 |
def TestClusterInit(rapi_user, rapi_secret): |
62 |
"""gnt-cluster init"""
|
63 |
master = qa_config.GetMasterNode() |
64 |
|
65 |
rapi_dir = os.path.dirname(constants.RAPI_USERS_FILE) |
66 |
|
67 |
# First create the RAPI credentials
|
68 |
fh = tempfile.NamedTemporaryFile() |
69 |
try:
|
70 |
fh.write("%s %s write\n" % (rapi_user, rapi_secret))
|
71 |
fh.flush() |
72 |
|
73 |
tmpru = qa_utils.UploadFile(master["primary"], fh.name)
|
74 |
try:
|
75 |
AssertCommand(["mkdir", "-p", rapi_dir]) |
76 |
AssertCommand(["mv", tmpru, constants.RAPI_USERS_FILE])
|
77 |
finally:
|
78 |
AssertCommand(["rm", "-f", tmpru]) |
79 |
finally:
|
80 |
fh.close() |
81 |
|
82 |
# Initialize cluster
|
83 |
cmd = ["gnt-cluster", "init"] |
84 |
|
85 |
cmd.append("--primary-ip-version=%d" %
|
86 |
qa_config.get("primary_ip_version", 4)) |
87 |
|
88 |
if master.get("secondary", None): |
89 |
cmd.append("--secondary-ip=%s" % master["secondary"]) |
90 |
|
91 |
bridge = qa_config.get("bridge", None) |
92 |
if bridge:
|
93 |
cmd.append("--bridge=%s" % bridge)
|
94 |
cmd.append("--master-netdev=%s" % bridge)
|
95 |
|
96 |
htype = qa_config.get("enabled-hypervisors", None) |
97 |
if htype:
|
98 |
cmd.append("--enabled-hypervisors=%s" % htype)
|
99 |
|
100 |
cmd.append(qa_config.get("name"))
|
101 |
|
102 |
AssertCommand(cmd) |
103 |
|
104 |
cmd = ["gnt-cluster", "modify"] |
105 |
# hypervisor parameter modifications
|
106 |
hvp = qa_config.get("hypervisor-parameters", {})
|
107 |
for k, v in hvp.items(): |
108 |
cmd.extend(["-H", "%s:%s" % (k, v)]) |
109 |
# backend parameter modifications
|
110 |
bep = qa_config.get("backend-parameters", "") |
111 |
if bep:
|
112 |
cmd.extend(["-B", bep])
|
113 |
|
114 |
if len(cmd) > 2: |
115 |
AssertCommand(cmd) |
116 |
|
117 |
# OS parameters
|
118 |
osp = qa_config.get("os-parameters", {})
|
119 |
for k, v in osp.items(): |
120 |
AssertCommand(["gnt-os", "modify", "-O", v, k]) |
121 |
|
122 |
# OS hypervisor parameters
|
123 |
os_hvp = qa_config.get("os-hvp", {})
|
124 |
for os_name in os_hvp: |
125 |
for hv, hvp in os_hvp[os_name].items(): |
126 |
AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name]) |
127 |
|
128 |
|
129 |
def TestClusterRename(): |
130 |
"""gnt-cluster rename"""
|
131 |
cmd = ["gnt-cluster", "rename", "-f"] |
132 |
|
133 |
original_name = qa_config.get("name")
|
134 |
rename_target = qa_config.get("rename", None) |
135 |
if rename_target is None: |
136 |
print qa_utils.FormatError('"rename" entry is missing') |
137 |
return
|
138 |
|
139 |
for data in [ |
140 |
cmd + [rename_target], |
141 |
_CLUSTER_VERIFY, |
142 |
cmd + [original_name], |
143 |
_CLUSTER_VERIFY, |
144 |
]: |
145 |
AssertCommand(data) |
146 |
|
147 |
|
148 |
def TestClusterOob(): |
149 |
"""out-of-band framework"""
|
150 |
oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
|
151 |
|
152 |
AssertCommand(_CLUSTER_VERIFY) |
153 |
AssertCommand(["gnt-cluster", "modify", "--node-parameters", |
154 |
"oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
|
155 |
utils.NewUUID()]) |
156 |
|
157 |
AssertCommand(_CLUSTER_VERIFY, fail=True)
|
158 |
|
159 |
AssertCommand(["touch", oob_path_exists])
|
160 |
AssertCommand(["chmod", "0400", oob_path_exists]) |
161 |
AssertCommand(["gnt-cluster", "copyfile", oob_path_exists]) |
162 |
|
163 |
try:
|
164 |
AssertCommand(["gnt-cluster", "modify", "--node-parameters", |
165 |
"oob_program=%s" % oob_path_exists])
|
166 |
|
167 |
AssertCommand(_CLUSTER_VERIFY, fail=True)
|
168 |
|
169 |
AssertCommand(["chmod", "0500", oob_path_exists]) |
170 |
AssertCommand(["gnt-cluster", "copyfile", oob_path_exists]) |
171 |
|
172 |
AssertCommand(_CLUSTER_VERIFY) |
173 |
finally:
|
174 |
AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists]) |
175 |
|
176 |
AssertCommand(["gnt-cluster", "modify", "--node-parameters", |
177 |
"oob_program="])
|
178 |
|
179 |
|
180 |
def TestClusterEpo(): |
181 |
"""gnt-cluster epo"""
|
182 |
master = qa_config.GetMasterNode() |
183 |
|
184 |
# Assert that OOB is unavailable for all nodes
|
185 |
result_output = GetCommandOutput(master["primary"],
|
186 |
"gnt-node list --verbose --no-headers -o"
|
187 |
" powered")
|
188 |
AssertEqual(compat.all(powered == "(unavail)"
|
189 |
for powered in result_output.splitlines()), True) |
190 |
|
191 |
# Conflicting
|
192 |
AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True) |
193 |
# --all doesn't expect arguments
|
194 |
AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True) |
195 |
|
196 |
# Unless --all is given master is not allowed to be in the list
|
197 |
AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True) |
198 |
|
199 |
# This shouldn't fail
|
200 |
AssertCommand(["gnt-cluster", "epo", "-f", "--all"]) |
201 |
|
202 |
# All instances should have been stopped now
|
203 |
result_output = GetCommandOutput(master["primary"],
|
204 |
"gnt-instance list --no-headers -o status")
|
205 |
# ERROR_down because the instance is stopped but not recorded as such
|
206 |
AssertEqual(compat.all(status == "ERROR_down"
|
207 |
for status in result_output.splitlines()), True) |
208 |
|
209 |
# Now start everything again
|
210 |
AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"]) |
211 |
|
212 |
# All instances should have been started now
|
213 |
result_output = GetCommandOutput(master["primary"],
|
214 |
"gnt-instance list --no-headers -o status")
|
215 |
AssertEqual(compat.all(status == "running"
|
216 |
for status in result_output.splitlines()), True) |
217 |
|
218 |
|
219 |
def TestClusterVerify(): |
220 |
"""gnt-cluster verify"""
|
221 |
AssertCommand(_CLUSTER_VERIFY) |
222 |
AssertCommand(["gnt-cluster", "verify-disks"]) |
223 |
|
224 |
|
225 |
def TestJobqueue(): |
226 |
"""gnt-debug test-jobqueue"""
|
227 |
AssertCommand(["gnt-debug", "test-jobqueue"]) |
228 |
|
229 |
|
230 |
def TestClusterReservedLvs(): |
231 |
"""gnt-cluster reserved lvs"""
|
232 |
for fail, cmd in [ |
233 |
(False, _CLUSTER_VERIFY),
|
234 |
(False, ["gnt-cluster", "modify", "--reserved-lvs", ""]), |
235 |
(False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]), |
236 |
(True, _CLUSTER_VERIFY),
|
237 |
(False, ["gnt-cluster", "modify", "--reserved-lvs", |
238 |
"xenvg/qa-test,.*/other-test"]),
|
239 |
(False, _CLUSTER_VERIFY),
|
240 |
(False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]), |
241 |
(False, _CLUSTER_VERIFY),
|
242 |
(False, ["gnt-cluster", "modify", "--reserved-lvs", ""]), |
243 |
(True, _CLUSTER_VERIFY),
|
244 |
(False, ["lvremove", "-f", "xenvg/qa-test"]), |
245 |
(False, _CLUSTER_VERIFY),
|
246 |
]: |
247 |
AssertCommand(cmd, fail=fail) |
248 |
|
249 |
|
250 |
def TestClusterModifyBe(): |
251 |
"""gnt-cluster modify -B"""
|
252 |
for fail, cmd in [ |
253 |
# mem
|
254 |
(False, ["gnt-cluster", "modify", "-B", "memory=256"]), |
255 |
(False, ["sh", "-c", "gnt-cluster info|grep '^ *memory: 256$'"]), |
256 |
(True, ["gnt-cluster", "modify", "-B", "memory=a"]), |
257 |
(False, ["gnt-cluster", "modify", "-B", "memory=128"]), |
258 |
(False, ["sh", "-c", "gnt-cluster info|grep '^ *memory: 128$'"]), |
259 |
# vcpus
|
260 |
(False, ["gnt-cluster", "modify", "-B", "vcpus=4"]), |
261 |
(False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]), |
262 |
(True, ["gnt-cluster", "modify", "-B", "vcpus=a"]), |
263 |
(False, ["gnt-cluster", "modify", "-B", "vcpus=1"]), |
264 |
(False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]), |
265 |
# auto_balance
|
266 |
(False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]), |
267 |
(False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]), |
268 |
(True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]), |
269 |
(False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]), |
270 |
(False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]), |
271 |
]: |
272 |
AssertCommand(cmd, fail=fail) |
273 |
|
274 |
# redo the original-requested BE parameters, if any
|
275 |
bep = qa_config.get("backend-parameters", "") |
276 |
if bep:
|
277 |
AssertCommand(["gnt-cluster", "modify", "-B", bep]) |
278 |
|
279 |
|
280 |
def TestClusterInfo(): |
281 |
"""gnt-cluster info"""
|
282 |
AssertCommand(["gnt-cluster", "info"]) |
283 |
|
284 |
|
285 |
def TestClusterRedistConf(): |
286 |
"""gnt-cluster redist-conf"""
|
287 |
AssertCommand(["gnt-cluster", "redist-conf"]) |
288 |
|
289 |
|
290 |
def TestClusterGetmaster(): |
291 |
"""gnt-cluster getmaster"""
|
292 |
AssertCommand(["gnt-cluster", "getmaster"]) |
293 |
|
294 |
|
295 |
def TestClusterVersion(): |
296 |
"""gnt-cluster version"""
|
297 |
AssertCommand(["gnt-cluster", "version"]) |
298 |
|
299 |
|
300 |
def TestClusterRenewCrypto(): |
301 |
"""gnt-cluster renew-crypto"""
|
302 |
master = qa_config.GetMasterNode() |
303 |
|
304 |
# Conflicting options
|
305 |
cmd = ["gnt-cluster", "renew-crypto", "--force", |
306 |
"--new-cluster-certificate", "--new-confd-hmac-key"] |
307 |
conflicting = [ |
308 |
["--new-rapi-certificate", "--rapi-certificate=/dev/null"], |
309 |
["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"], |
310 |
] |
311 |
for i in conflicting: |
312 |
AssertCommand(cmd + i, fail=True)
|
313 |
|
314 |
# Invalid RAPI certificate
|
315 |
cmd = ["gnt-cluster", "renew-crypto", "--force", |
316 |
"--rapi-certificate=/dev/null"]
|
317 |
AssertCommand(cmd, fail=True)
|
318 |
|
319 |
rapi_cert_backup = qa_utils.BackupFile(master["primary"],
|
320 |
constants.RAPI_CERT_FILE) |
321 |
try:
|
322 |
# Custom RAPI certificate
|
323 |
fh = tempfile.NamedTemporaryFile() |
324 |
|
325 |
# Ensure certificate doesn't cause "gnt-cluster verify" to complain
|
326 |
validity = constants.SSL_CERT_EXPIRATION_WARN * 3
|
327 |
|
328 |
utils.GenerateSelfSignedSslCert(fh.name, validity=validity) |
329 |
|
330 |
tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
|
331 |
try:
|
332 |
AssertCommand(["gnt-cluster", "renew-crypto", "--force", |
333 |
"--rapi-certificate=%s" % tmpcert])
|
334 |
finally:
|
335 |
AssertCommand(["rm", "-f", tmpcert]) |
336 |
|
337 |
# Custom cluster domain secret
|
338 |
cds_fh = tempfile.NamedTemporaryFile() |
339 |
cds_fh.write(utils.GenerateSecret()) |
340 |
cds_fh.write("\n")
|
341 |
cds_fh.flush() |
342 |
|
343 |
tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
|
344 |
try:
|
345 |
AssertCommand(["gnt-cluster", "renew-crypto", "--force", |
346 |
"--cluster-domain-secret=%s" % tmpcds])
|
347 |
finally:
|
348 |
AssertCommand(["rm", "-f", tmpcds]) |
349 |
|
350 |
# Normal case
|
351 |
AssertCommand(["gnt-cluster", "renew-crypto", "--force", |
352 |
"--new-cluster-certificate", "--new-confd-hmac-key", |
353 |
"--new-rapi-certificate", "--new-cluster-domain-secret"]) |
354 |
|
355 |
# Restore RAPI certificate
|
356 |
AssertCommand(["gnt-cluster", "renew-crypto", "--force", |
357 |
"--rapi-certificate=%s" % rapi_cert_backup])
|
358 |
finally:
|
359 |
AssertCommand(["rm", "-f", rapi_cert_backup]) |
360 |
|
361 |
|
362 |
def TestClusterBurnin(): |
363 |
"""Burnin"""
|
364 |
master = qa_config.GetMasterNode() |
365 |
|
366 |
options = qa_config.get("options", {})
|
367 |
disk_template = options.get("burnin-disk-template", "drbd") |
368 |
parallel = options.get("burnin-in-parallel", False) |
369 |
check_inst = options.get("burnin-check-instances", False) |
370 |
do_rename = options.get("burnin-rename", "") |
371 |
do_reboot = options.get("burnin-reboot", True) |
372 |
reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
|
373 |
|
374 |
# Get as many instances as we need
|
375 |
instances = [] |
376 |
try:
|
377 |
try:
|
378 |
num = qa_config.get("options", {}).get("burnin-instances", 1) |
379 |
for _ in range(0, num): |
380 |
instances.append(qa_config.AcquireInstance()) |
381 |
except qa_error.OutOfInstancesError:
|
382 |
print "Not enough instances, continuing anyway." |
383 |
|
384 |
if len(instances) < 1: |
385 |
raise qa_error.Error("Burnin needs at least one instance") |
386 |
|
387 |
script = qa_utils.UploadFile(master["primary"], "../tools/burnin") |
388 |
try:
|
389 |
# Run burnin
|
390 |
cmd = [script, |
391 |
"--os=%s" % qa_config.get("os"), |
392 |
"--disk-size=%s" % ",".join(qa_config.get("disk")), |
393 |
"--disk-growth=%s" % ",".join(qa_config.get("disk-growth")), |
394 |
"--disk-template=%s" % disk_template]
|
395 |
if parallel:
|
396 |
cmd.append("--parallel")
|
397 |
cmd.append("--early-release")
|
398 |
if check_inst:
|
399 |
cmd.append("--http-check")
|
400 |
if do_rename:
|
401 |
cmd.append("--rename=%s" % do_rename)
|
402 |
if not do_reboot: |
403 |
cmd.append("--no-reboot")
|
404 |
else:
|
405 |
cmd.append("--reboot-types=%s" % ",".join(reboot_types)) |
406 |
cmd += [inst["name"] for inst in instances] |
407 |
AssertCommand(cmd) |
408 |
finally:
|
409 |
AssertCommand(["rm", "-f", script]) |
410 |
|
411 |
finally:
|
412 |
for inst in instances: |
413 |
qa_config.ReleaseInstance(inst) |
414 |
|
415 |
|
416 |
def TestClusterMasterFailover(): |
417 |
"""gnt-cluster master-failover"""
|
418 |
master = qa_config.GetMasterNode() |
419 |
failovermaster = qa_config.AcquireNode(exclude=master) |
420 |
|
421 |
cmd = ["gnt-cluster", "master-failover"] |
422 |
try:
|
423 |
AssertCommand(cmd, node=failovermaster) |
424 |
# Back to original master node
|
425 |
AssertCommand(cmd, node=master) |
426 |
finally:
|
427 |
qa_config.ReleaseNode(failovermaster) |
428 |
|
429 |
|
430 |
def TestClusterMasterFailoverWithDrainedQueue(): |
431 |
"""gnt-cluster master-failover with drained queue"""
|
432 |
drain_check = ["test", "-f", constants.JOB_QUEUE_DRAIN_FILE] |
433 |
|
434 |
master = qa_config.GetMasterNode() |
435 |
failovermaster = qa_config.AcquireNode(exclude=master) |
436 |
|
437 |
# Ensure queue is not drained
|
438 |
for node in [master, failovermaster]: |
439 |
AssertCommand(drain_check, node=node, fail=True)
|
440 |
|
441 |
# Drain queue on failover master
|
442 |
AssertCommand(["touch", constants.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
|
443 |
|
444 |
cmd = ["gnt-cluster", "master-failover"] |
445 |
try:
|
446 |
AssertCommand(drain_check, node=failovermaster) |
447 |
AssertCommand(cmd, node=failovermaster) |
448 |
AssertCommand(drain_check, fail=True)
|
449 |
AssertCommand(drain_check, node=failovermaster, fail=True)
|
450 |
|
451 |
# Back to original master node
|
452 |
AssertCommand(cmd, node=master) |
453 |
finally:
|
454 |
qa_config.ReleaseNode(failovermaster) |
455 |
|
456 |
AssertCommand(drain_check, fail=True)
|
457 |
AssertCommand(drain_check, node=failovermaster, fail=True)
|
458 |
|
459 |
|
460 |
def TestClusterCopyfile(): |
461 |
"""gnt-cluster copyfile"""
|
462 |
master = qa_config.GetMasterNode() |
463 |
|
464 |
uniqueid = utils.NewUUID() |
465 |
|
466 |
# Create temporary file
|
467 |
f = tempfile.NamedTemporaryFile() |
468 |
f.write(uniqueid) |
469 |
f.flush() |
470 |
f.seek(0)
|
471 |
|
472 |
# Upload file to master node
|
473 |
testname = qa_utils.UploadFile(master["primary"], f.name)
|
474 |
try:
|
475 |
# Copy file to all nodes
|
476 |
AssertCommand(["gnt-cluster", "copyfile", testname]) |
477 |
_CheckFileOnAllNodes(testname, uniqueid) |
478 |
finally:
|
479 |
_RemoveFileFromAllNodes(testname) |
480 |
|
481 |
|
482 |
def TestClusterCommand(): |
483 |
"""gnt-cluster command"""
|
484 |
uniqueid = utils.NewUUID() |
485 |
rfile = "/tmp/gnt%s" % utils.NewUUID()
|
486 |
rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid]) |
487 |
cmd = utils.ShellQuoteArgs(["gnt-cluster", "command", |
488 |
"%s >%s" % (rcmd, rfile)])
|
489 |
|
490 |
try:
|
491 |
AssertCommand(cmd) |
492 |
_CheckFileOnAllNodes(rfile, uniqueid) |
493 |
finally:
|
494 |
_RemoveFileFromAllNodes(rfile) |
495 |
|
496 |
|
497 |
def TestClusterDestroy(): |
498 |
"""gnt-cluster destroy"""
|
499 |
AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"]) |
500 |
|
501 |
|
502 |
def TestClusterRepairDiskSizes(): |
503 |
"""gnt-cluster repair-disk-sizes"""
|
504 |
AssertCommand(["gnt-cluster", "repair-disk-sizes"]) |