Revision cec9845c qa/ganeti-qa.py
b/qa/ganeti-qa.py | ||
---|---|---|
1 | 1 |
#!/usr/bin/python |
2 | 2 |
# |
3 | 3 |
|
4 |
# Copyright (C) 2006, 2007 Google Inc.
|
|
4 |
# Copyright (C) 2007 Google Inc. |
|
5 | 5 |
# |
6 | 6 |
# This program is free software; you can redistribute it and/or modify |
7 | 7 |
# it under the terms of the GNU General Public License as published by |
... | ... | |
19 | 19 |
# 02110-1301, USA. |
20 | 20 |
|
21 | 21 |
|
22 |
"""Script for doing Q&A on Ganeti
|
|
22 |
"""Script for doing QA on Ganeti.
|
|
23 | 23 |
|
24 | 24 |
You can create the required known_hosts file using ssh-keyscan. It's mandatory |
25 | 25 |
to use the full name of a node (FQDN). For security reasons, verify the keys |
... | ... | |
27 | 27 |
Example: ssh-keyscan -t rsa node{1,2,3,4}.example.com > known_hosts |
28 | 28 |
""" |
29 | 29 |
|
30 |
import os |
|
31 |
import re |
|
32 | 30 |
import sys |
33 |
import yaml |
|
34 |
import time |
|
35 |
import tempfile |
|
36 | 31 |
from datetime import datetime |
37 | 32 |
from optparse import OptionParser |
38 | 33 |
|
39 |
# I want more flexibility for testing over SSH, therefore I'm not using |
|
40 |
# Ganeti's ssh module. |
|
41 |
import subprocess |
|
34 |
import qa_cluster |
|
35 |
import qa_config |
|
36 |
import qa_daemon |
|
37 |
import qa_env |
|
38 |
import qa_instance |
|
39 |
import qa_node |
|
40 |
import qa_other |
|
42 | 41 |
|
43 |
from ganeti import utils |
|
44 |
from ganeti import constants |
|
45 | 42 |
|
46 |
# {{{ Global variables |
|
47 |
cfg = None |
|
48 |
options = None |
|
49 |
# }}} |
|
50 |
|
|
51 |
# {{{ Errors |
|
52 |
class Error(Exception): |
|
53 |
"""An error occurred during Q&A testing. |
|
54 |
|
|
55 |
""" |
|
56 |
pass |
|
57 |
|
|
58 |
|
|
59 |
class OutOfNodesError(Error): |
|
60 |
"""Out of nodes. |
|
61 |
|
|
62 |
""" |
|
63 |
pass |
|
64 |
|
|
65 |
|
|
66 |
class OutOfInstancesError(Error): |
|
67 |
"""Out of instances. |
|
68 |
|
|
69 |
""" |
|
70 |
pass |
|
71 |
# }}} |
|
72 |
|
|
73 |
# {{{ Utilities |
|
74 |
def TestEnabled(test): |
|
75 |
"""Returns True if the given test is enabled.""" |
|
76 |
return cfg.get('tests', {}).get(test, False) |
|
77 |
|
|
78 |
|
|
79 |
def RunTest(callable, *args): |
|
43 |
def RunTest(fn, *args): |
|
80 | 44 |
"""Runs a test after printing a header. |
81 | 45 |
|
82 | 46 |
""" |
83 |
if callable.__doc__:
|
|
84 |
desc = callable.__doc__.splitlines()[0].strip()
|
|
47 |
if fn.__doc__:
|
|
48 |
desc = fn.__doc__.splitlines()[0].strip()
|
|
85 | 49 |
else: |
86 |
desc = '%r' % callable
|
|
50 |
desc = '%r' % fn
|
|
87 | 51 |
|
88 | 52 |
now = str(datetime.now()) |
89 | 53 |
|
... | ... | |
92 | 56 |
print desc |
93 | 57 |
print '-' * 60 |
94 | 58 |
|
95 |
return callable(*args) |
|
96 |
|
|
97 |
|
|
98 |
def AssertEqual(first, second, msg=None): |
|
99 |
"""Raises an error when values aren't equal. |
|
100 |
|
|
101 |
""" |
|
102 |
if not first == second: |
|
103 |
raise Error(msg or '%r == %r' % (first, second)) |
|
104 |
|
|
105 |
|
|
106 |
def GetSSHCommand(node, cmd, strict=True): |
|
107 |
"""Builds SSH command to be executed. |
|
108 |
|
|
109 |
""" |
|
110 |
args = [ 'ssh', '-oEscapeChar=none', '-oBatchMode=yes', '-l', 'root' ] |
|
111 |
|
|
112 |
if strict: |
|
113 |
tmp = 'yes' |
|
114 |
else: |
|
115 |
tmp = 'no' |
|
116 |
args.append('-oStrictHostKeyChecking=%s' % tmp) |
|
117 |
args.append('-oClearAllForwardings=yes') |
|
118 |
args.append('-oForwardAgent=yes') |
|
119 |
args.append(node) |
|
120 |
|
|
121 |
if options.dry_run: |
|
122 |
prefix = 'exit 0; ' |
|
123 |
else: |
|
124 |
prefix = '' |
|
125 |
|
|
126 |
args.append(prefix + cmd) |
|
127 |
|
|
128 |
print 'SSH:', utils.ShellQuoteArgs(args) |
|
129 |
|
|
130 |
return args |
|
131 |
|
|
132 |
|
|
133 |
def StartSSH(node, cmd, strict=True): |
|
134 |
"""Starts SSH. |
|
135 |
|
|
136 |
""" |
|
137 |
args = GetSSHCommand(node, cmd, strict=strict) |
|
138 |
return subprocess.Popen(args, shell=False) |
|
139 |
|
|
140 |
|
|
141 |
def UploadFile(node, file): |
|
142 |
"""Uploads a file to a node and returns the filename. |
|
143 |
|
|
144 |
Caller needs to remove the returned file on the node when it's not needed |
|
145 |
anymore. |
|
146 |
""" |
|
147 |
# Make sure nobody else has access to it while preserving local permissions |
|
148 |
mode = os.stat(file).st_mode & 0700 |
|
149 |
|
|
150 |
cmd = ('tmp=$(tempfile --mode %o --prefix gnt) && ' |
|
151 |
'[[ -f "${tmp}" ]] && ' |
|
152 |
'cat > "${tmp}" && ' |
|
153 |
'echo "${tmp}"') % mode |
|
154 |
|
|
155 |
f = open(file, 'r') |
|
156 |
try: |
|
157 |
p = subprocess.Popen(GetSSHCommand(node, cmd), shell=False, stdin=f, |
|
158 |
stdout=subprocess.PIPE) |
|
159 |
AssertEqual(p.wait(), 0) |
|
160 |
|
|
161 |
# Return temporary filename |
|
162 |
return p.stdout.read().strip() |
|
163 |
finally: |
|
164 |
f.close() |
|
165 |
# }}} |
|
166 |
|
|
167 |
# {{{ Config helpers |
|
168 |
def GetMasterNode(): |
|
169 |
return cfg['nodes'][0] |
|
170 |
|
|
171 |
|
|
172 |
def AcquireInstance(): |
|
173 |
"""Returns an instance which isn't in use. |
|
174 |
|
|
175 |
""" |
|
176 |
# Filter out unwanted instances |
|
177 |
tmp_flt = lambda inst: not inst.get('_used', False) |
|
178 |
instances = filter(tmp_flt, cfg['instances']) |
|
179 |
del tmp_flt |
|
180 |
|
|
181 |
if len(instances) == 0: |
|
182 |
raise OutOfInstancesError("No instances left") |
|
59 |
return fn(*args) |
|
183 | 60 |
|
184 |
inst = instances[0] |
|
185 |
inst['_used'] = True |
|
186 |
return inst |
|
187 | 61 |
|
188 |
|
|
189 |
def ReleaseInstance(inst): |
|
190 |
inst['_used'] = False |
|
191 |
|
|
192 |
|
|
193 |
def AcquireNode(exclude=None): |
|
194 |
"""Returns the least used node. |
|
195 |
|
|
196 |
""" |
|
197 |
master = GetMasterNode() |
|
198 |
|
|
199 |
# Filter out unwanted nodes |
|
200 |
# TODO: Maybe combine filters |
|
201 |
if exclude is None: |
|
202 |
nodes = cfg['nodes'][:] |
|
203 |
else: |
|
204 |
nodes = filter(lambda node: node != exclude, cfg['nodes']) |
|
205 |
|
|
206 |
tmp_flt = lambda node: node.get('_added', False) or node == master |
|
207 |
nodes = filter(tmp_flt, nodes) |
|
208 |
del tmp_flt |
|
209 |
|
|
210 |
if len(nodes) == 0: |
|
211 |
raise OutOfNodesError("No nodes left") |
|
212 |
|
|
213 |
# Get node with least number of uses |
|
214 |
def compare(a, b): |
|
215 |
result = cmp(a.get('_count', 0), b.get('_count', 0)) |
|
216 |
if result == 0: |
|
217 |
result = cmp(a['primary'], b['primary']) |
|
218 |
return result |
|
219 |
|
|
220 |
nodes.sort(cmp=compare) |
|
221 |
|
|
222 |
node = nodes[0] |
|
223 |
node['_count'] = node.get('_count', 0) + 1 |
|
224 |
return node |
|
225 |
|
|
226 |
|
|
227 |
def ReleaseNode(node): |
|
228 |
node['_count'] = node.get('_count', 0) - 1 |
|
229 |
# }}} |
|
230 |
|
|
231 |
# {{{ Environment tests |
|
232 |
def TestConfig(): |
|
233 |
"""Test configuration for sanity. |
|
234 |
|
|
235 |
""" |
|
236 |
if len(cfg['nodes']) < 1: |
|
237 |
raise Error("Need at least one node") |
|
238 |
if len(cfg['instances']) < 1: |
|
239 |
raise Error("Need at least one instance") |
|
240 |
# TODO: Add more checks |
|
241 |
|
|
242 |
|
|
243 |
def TestSshConnection(): |
|
244 |
"""Test SSH connection. |
|
245 |
|
|
246 |
""" |
|
247 |
for node in cfg['nodes']: |
|
248 |
AssertEqual(StartSSH(node['primary'], 'exit').wait(), 0) |
|
249 |
|
|
250 |
|
|
251 |
def TestGanetiCommands(): |
|
252 |
"""Test availibility of Ganeti commands. |
|
62 |
def main(): |
|
63 |
"""Main program. |
|
253 | 64 |
|
254 | 65 |
""" |
255 |
cmds = ( ['gnt-cluster', '--version'], |
|
256 |
['gnt-os', '--version'], |
|
257 |
['gnt-node', '--version'], |
|
258 |
['gnt-instance', '--version'], |
|
259 |
['gnt-backup', '--version'], |
|
260 |
['ganeti-noded', '--version'], |
|
261 |
['ganeti-watcher', '--version'] ) |
|
262 |
|
|
263 |
cmd = ' && '.join([utils.ShellQuoteArgs(i) for i in cmds]) |
|
264 |
|
|
265 |
for node in cfg['nodes']: |
|
266 |
AssertEqual(StartSSH(node['primary'], cmd).wait(), 0) |
|
267 |
|
|
268 |
|
|
269 |
def TestIcmpPing(): |
|
270 |
"""ICMP ping each node. |
|
271 |
|
|
272 |
""" |
|
273 |
for node in cfg['nodes']: |
|
274 |
check = [] |
|
275 |
for i in cfg['nodes']: |
|
276 |
check.append(i['primary']) |
|
277 |
if i.has_key('secondary'): |
|
278 |
check.append(i['secondary']) |
|
279 |
|
|
280 |
ping = lambda ip: utils.ShellQuoteArgs(['ping', '-w', '3', '-c', '1', ip]) |
|
281 |
cmd = ' && '.join([ping(i) for i in check]) |
|
282 |
|
|
283 |
AssertEqual(StartSSH(node['primary'], cmd).wait(), 0) |
|
284 |
# }}} |
|
285 |
|
|
286 |
# {{{ Cluster tests |
|
287 |
def TestClusterInit(): |
|
288 |
"""gnt-cluster init""" |
|
289 |
master = GetMasterNode() |
|
290 |
|
|
291 |
cmd = ['gnt-cluster', 'init'] |
|
292 |
if master.get('secondary', None): |
|
293 |
cmd.append('--secondary-ip=%s' % master['secondary']) |
|
294 |
if cfg.get('bridge', None): |
|
295 |
cmd.append('--bridge=%s' % cfg['bridge']) |
|
296 |
cmd.append('--master-netdev=%s' % cfg['bridge']) |
|
297 |
cmd.append(cfg['name']) |
|
298 |
|
|
299 |
AssertEqual(StartSSH(master['primary'], |
|
300 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
301 |
|
|
302 |
|
|
303 |
def TestClusterVerify(): |
|
304 |
"""gnt-cluster verify""" |
|
305 |
cmd = ['gnt-cluster', 'verify'] |
|
306 |
AssertEqual(StartSSH(GetMasterNode()['primary'], |
|
307 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
308 |
|
|
309 |
|
|
310 |
def TestClusterInfo(): |
|
311 |
"""gnt-cluster info""" |
|
312 |
cmd = ['gnt-cluster', 'info'] |
|
313 |
AssertEqual(StartSSH(GetMasterNode()['primary'], |
|
314 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
315 |
|
|
316 |
|
|
317 |
def TestClusterBurnin(): |
|
318 |
"""Burnin""" |
|
319 |
master = GetMasterNode() |
|
320 |
|
|
321 |
# Get as many instances as we need |
|
322 |
instances = [] |
|
323 |
try: |
|
324 |
for _ in xrange(0, cfg.get('options', {}).get('burnin-instances', 1)): |
|
325 |
instances.append(AcquireInstance()) |
|
326 |
except OutOfInstancesError: |
|
327 |
print "Not enough instances, continuing anyway." |
|
328 |
|
|
329 |
if len(instances) < 1: |
|
330 |
raise Error("Burnin needs at least one instance") |
|
331 |
|
|
332 |
# Run burnin |
|
333 |
try: |
|
334 |
script = UploadFile(master['primary'], '../tools/burnin') |
|
335 |
try: |
|
336 |
cmd = [script, |
|
337 |
'--os=%s' % cfg['os'], |
|
338 |
'--os-size=%s' % cfg['os-size'], |
|
339 |
'--swap-size=%s' % cfg['swap-size']] |
|
340 |
cmd += [inst['name'] for inst in instances] |
|
341 |
AssertEqual(StartSSH(master['primary'], |
|
342 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
343 |
finally: |
|
344 |
cmd = ['rm', '-f', script] |
|
345 |
AssertEqual(StartSSH(master['primary'], |
|
346 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
347 |
finally: |
|
348 |
for inst in instances: |
|
349 |
ReleaseInstance(inst) |
|
350 |
|
|
351 |
|
|
352 |
def TestClusterMasterFailover(): |
|
353 |
"""gnt-cluster masterfailover""" |
|
354 |
master = GetMasterNode() |
|
355 |
|
|
356 |
failovermaster = AcquireNode(exclude=master) |
|
357 |
try: |
|
358 |
cmd = ['gnt-cluster', 'masterfailover'] |
|
359 |
AssertEqual(StartSSH(failovermaster['primary'], |
|
360 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
361 |
|
|
362 |
cmd = ['gnt-cluster', 'masterfailover'] |
|
363 |
AssertEqual(StartSSH(master['primary'], |
|
364 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
365 |
finally: |
|
366 |
ReleaseNode(failovermaster) |
|
367 |
|
|
368 |
|
|
369 |
def TestClusterCopyfile(): |
|
370 |
"""gnt-cluster copyfile""" |
|
371 |
master = GetMasterNode() |
|
372 |
|
|
373 |
# Create temporary file |
|
374 |
f = tempfile.NamedTemporaryFile() |
|
375 |
f.write("I'm a testfile.\n") |
|
376 |
f.flush() |
|
377 |
f.seek(0) |
|
378 |
|
|
379 |
# Upload file to master node |
|
380 |
testname = UploadFile(master['primary'], f.name) |
|
381 |
try: |
|
382 |
# Copy file to all nodes |
|
383 |
cmd = ['gnt-cluster', 'copyfile', testname] |
|
384 |
AssertEqual(StartSSH(master['primary'], |
|
385 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
386 |
finally: |
|
387 |
# Remove file from all nodes |
|
388 |
for node in cfg['nodes']: |
|
389 |
cmd = ['rm', '-f', testname] |
|
390 |
AssertEqual(StartSSH(node['primary'], |
|
391 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
392 |
|
|
393 |
|
|
394 |
def TestClusterDestroy(): |
|
395 |
"""gnt-cluster destroy""" |
|
396 |
cmd = ['gnt-cluster', 'destroy', '--yes-do-it'] |
|
397 |
AssertEqual(StartSSH(GetMasterNode()['primary'], |
|
398 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
399 |
# }}} |
|
400 |
|
|
401 |
# {{{ Node tests |
|
402 |
def _NodeAdd(node): |
|
403 |
if node.get('_added', False): |
|
404 |
raise Error("Node %s already in cluster" % node['primary']) |
|
405 |
|
|
406 |
cmd = ['gnt-node', 'add'] |
|
407 |
if node.get('secondary', None): |
|
408 |
cmd.append('--secondary-ip=%s' % node['secondary']) |
|
409 |
cmd.append(node['primary']) |
|
410 |
AssertEqual(StartSSH(GetMasterNode()['primary'], |
|
411 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
412 |
|
|
413 |
node['_added'] = True |
|
414 |
|
|
415 |
|
|
416 |
def TestNodeAddAll(): |
|
417 |
"""Adding all nodes to cluster.""" |
|
418 |
master = GetMasterNode() |
|
419 |
for node in cfg['nodes']: |
|
420 |
if node != master: |
|
421 |
_NodeAdd(node) |
|
422 |
|
|
423 |
|
|
424 |
def _NodeRemove(node): |
|
425 |
cmd = ['gnt-node', 'remove', node['primary']] |
|
426 |
AssertEqual(StartSSH(GetMasterNode()['primary'], |
|
427 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
428 |
node['_added'] = False |
|
429 |
|
|
430 |
|
|
431 |
def TestNodeRemoveAll(): |
|
432 |
"""Removing all nodes from cluster.""" |
|
433 |
master = GetMasterNode() |
|
434 |
for node in cfg['nodes']: |
|
435 |
if node != master: |
|
436 |
_NodeRemove(node) |
|
437 |
|
|
438 |
|
|
439 |
def TestNodeInfo(): |
|
440 |
"""gnt-node info""" |
|
441 |
cmd = ['gnt-node', 'info'] |
|
442 |
AssertEqual(StartSSH(GetMasterNode()['primary'], |
|
443 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
444 |
|
|
445 |
|
|
446 |
def TestNodeVolumes(): |
|
447 |
"""gnt-node volumes""" |
|
448 |
cmd = ['gnt-node', 'volumes'] |
|
449 |
AssertEqual(StartSSH(GetMasterNode()['primary'], |
|
450 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
451 |
# }}} |
|
452 |
|
|
453 |
# {{{ Instance tests |
|
454 |
def _DiskTest(node, instance, args): |
|
455 |
cmd = ['gnt-instance', 'add', |
|
456 |
'--os-type=%s' % cfg['os'], |
|
457 |
'--os-size=%s' % cfg['os-size'], |
|
458 |
'--swap-size=%s' % cfg['swap-size'], |
|
459 |
'--memory=%s' % cfg['mem'], |
|
460 |
'--node=%s' % node['primary']] |
|
461 |
if args: |
|
462 |
cmd += args |
|
463 |
cmd.append(instance['name']) |
|
464 |
|
|
465 |
AssertEqual(StartSSH(GetMasterNode()['primary'], |
|
466 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
467 |
return instance |
|
468 |
|
|
469 |
|
|
470 |
def TestInstanceAddWithPlainDisk(node): |
|
471 |
"""gnt-instance add -t plain""" |
|
472 |
return _DiskTest(node, AcquireInstance(), ['--disk-template=plain']) |
|
473 |
|
|
474 |
|
|
475 |
def TestInstanceAddWithLocalMirrorDisk(node): |
|
476 |
"""gnt-instance add -t local_raid1""" |
|
477 |
return _DiskTest(node, AcquireInstance(), ['--disk-template=local_raid1']) |
|
478 |
|
|
479 |
|
|
480 |
def TestInstanceAddWithRemoteRaidDisk(node, node2): |
|
481 |
"""gnt-instance add -t remote_raid1""" |
|
482 |
return _DiskTest(node, AcquireInstance(), |
|
483 |
['--disk-template=remote_raid1', |
|
484 |
'--secondary-node=%s' % node2['primary']]) |
|
485 |
|
|
486 |
|
|
487 |
def TestInstanceRemove(instance): |
|
488 |
"""gnt-instance remove""" |
|
489 |
cmd = ['gnt-instance', 'remove', '-f', instance['name']] |
|
490 |
AssertEqual(StartSSH(GetMasterNode()['primary'], |
|
491 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
492 |
|
|
493 |
ReleaseInstance(instance) |
|
494 |
|
|
495 |
|
|
496 |
def TestInstanceStartup(instance): |
|
497 |
"""gnt-instance startup""" |
|
498 |
cmd = ['gnt-instance', 'startup', instance['name']] |
|
499 |
AssertEqual(StartSSH(GetMasterNode()['primary'], |
|
500 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
501 |
|
|
502 |
|
|
503 |
def TestInstanceShutdown(instance): |
|
504 |
"""gnt-instance shutdown""" |
|
505 |
cmd = ['gnt-instance', 'shutdown', instance['name']] |
|
506 |
AssertEqual(StartSSH(GetMasterNode()['primary'], |
|
507 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
508 |
|
|
509 |
|
|
510 |
def TestInstanceFailover(instance): |
|
511 |
"""gnt-instance failover""" |
|
512 |
cmd = ['gnt-instance', 'failover', '--force', instance['name']] |
|
513 |
AssertEqual(StartSSH(GetMasterNode()['primary'], |
|
514 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
515 |
|
|
516 |
|
|
517 |
def TestInstanceInfo(instance): |
|
518 |
"""gnt-instance info""" |
|
519 |
cmd = ['gnt-instance', 'info', instance['name']] |
|
520 |
AssertEqual(StartSSH(GetMasterNode()['primary'], |
|
521 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
522 |
# }}} |
|
523 |
|
|
524 |
# {{{ Daemon tests |
|
525 |
def _ResolveInstanceName(instance): |
|
526 |
"""Gets the full Xen name of an instance. |
|
527 |
|
|
528 |
""" |
|
529 |
master = GetMasterNode() |
|
530 |
|
|
531 |
info_cmd = utils.ShellQuoteArgs(['gnt-instance', 'info', instance['name']]) |
|
532 |
sed_cmd = utils.ShellQuoteArgs(['sed', '-n', '-e', 's/^Instance name: *//p']) |
|
533 |
|
|
534 |
cmd = '%s | %s' % (info_cmd, sed_cmd) |
|
535 |
p = subprocess.Popen(GetSSHCommand(master['primary'], cmd), shell=False, |
|
536 |
stdout=subprocess.PIPE) |
|
537 |
AssertEqual(p.wait(), 0) |
|
538 |
|
|
539 |
return p.stdout.read().strip() |
|
540 |
|
|
541 |
|
|
542 |
def _InstanceRunning(node, name): |
|
543 |
"""Checks whether an instance is running. |
|
544 |
|
|
545 |
Args: |
|
546 |
node: Node the instance runs on |
|
547 |
name: Full name of Xen instance |
|
548 |
""" |
|
549 |
cmd = utils.ShellQuoteArgs(['xm', 'list', name]) + ' >/dev/null' |
|
550 |
ret = StartSSH(node['primary'], cmd).wait() |
|
551 |
return ret == 0 |
|
552 |
|
|
553 |
|
|
554 |
def _XmShutdownInstance(node, name): |
|
555 |
"""Shuts down instance using "xm" and waits for completion. |
|
556 |
|
|
557 |
Args: |
|
558 |
node: Node the instance runs on |
|
559 |
name: Full name of Xen instance |
|
560 |
""" |
|
561 |
cmd = ['xm', 'shutdown', name] |
|
562 |
AssertEqual(StartSSH(GetMasterNode()['primary'], |
|
563 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
564 |
|
|
565 |
# Wait up to a minute |
|
566 |
end = time.time() + 60 |
|
567 |
while time.time() <= end: |
|
568 |
if not _InstanceRunning(node, name): |
|
569 |
break |
|
570 |
time.sleep(5) |
|
571 |
else: |
|
572 |
raise Error("xm shutdown failed") |
|
573 |
|
|
574 |
|
|
575 |
def _ResetWatcherDaemon(node): |
|
576 |
"""Removes the watcher daemon's state file. |
|
577 |
|
|
578 |
Args: |
|
579 |
node: Node to be reset |
|
580 |
""" |
|
581 |
cmd = ['rm', '-f', constants.WATCHER_STATEFILE] |
|
582 |
AssertEqual(StartSSH(node['primary'], |
|
583 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
584 |
|
|
585 |
|
|
586 |
def TestInstanceAutomaticRestart(node, instance): |
|
587 |
"""Test automatic restart of instance by ganeti-watcher. |
|
588 |
|
|
589 |
Note: takes up to 6 minutes to complete. |
|
590 |
""" |
|
591 |
master = GetMasterNode() |
|
592 |
inst_name = _ResolveInstanceName(instance) |
|
593 |
|
|
594 |
_ResetWatcherDaemon(node) |
|
595 |
_XmShutdownInstance(node, inst_name) |
|
596 |
|
|
597 |
# Give it a bit more than five minutes to start again |
|
598 |
restart_at = time.time() + 330 |
|
599 |
|
|
600 |
# Wait until it's running again |
|
601 |
while time.time() <= restart_at: |
|
602 |
if _InstanceRunning(node, inst_name): |
|
603 |
break |
|
604 |
time.sleep(15) |
|
605 |
else: |
|
606 |
raise Error("Daemon didn't restart instance in time") |
|
607 |
|
|
608 |
cmd = ['gnt-instance', 'info', inst_name] |
|
609 |
AssertEqual(StartSSH(master['primary'], |
|
610 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
611 |
|
|
612 |
|
|
613 |
def TestInstanceConsecutiveFailures(node, instance): |
|
614 |
"""Test five consecutive instance failures. |
|
615 |
|
|
616 |
Note: takes at least 35 minutes to complete. |
|
617 |
""" |
|
618 |
master = GetMasterNode() |
|
619 |
inst_name = _ResolveInstanceName(instance) |
|
620 |
|
|
621 |
_ResetWatcherDaemon(node) |
|
622 |
_XmShutdownInstance(node, inst_name) |
|
623 |
|
|
624 |
# Do shutdowns for 30 minutes |
|
625 |
finished_at = time.time() + (35 * 60) |
|
626 |
|
|
627 |
while time.time() <= finished_at: |
|
628 |
if _InstanceRunning(node, inst_name): |
|
629 |
_XmShutdownInstance(node, inst_name) |
|
630 |
time.sleep(30) |
|
631 |
|
|
632 |
# Check for some time whether the instance doesn't start again |
|
633 |
check_until = time.time() + 330 |
|
634 |
while time.time() <= check_until: |
|
635 |
if _InstanceRunning(node, inst_name): |
|
636 |
raise Error("Instance started when it shouldn't") |
|
637 |
time.sleep(30) |
|
638 |
|
|
639 |
cmd = ['gnt-instance', 'info', inst_name] |
|
640 |
AssertEqual(StartSSH(master['primary'], |
|
641 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
642 |
# }}} |
|
643 |
|
|
644 |
# {{{ Other tests |
|
645 |
def TestUploadKnownHostsFile(localpath): |
|
646 |
"""Uploading known_hosts file. |
|
647 |
|
|
648 |
""" |
|
649 |
master = GetMasterNode() |
|
650 |
|
|
651 |
tmpfile = UploadFile(master['primary'], localpath) |
|
652 |
try: |
|
653 |
cmd = ['mv', tmpfile, constants.SSH_KNOWN_HOSTS_FILE] |
|
654 |
AssertEqual(StartSSH(master['primary'], |
|
655 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
656 |
except: |
|
657 |
cmd = ['rm', '-f', tmpfile] |
|
658 |
AssertEqual(StartSSH(master['primary'], |
|
659 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
|
660 |
raise |
|
661 |
# }}} |
|
662 |
|
|
663 |
# {{{ Main program |
|
664 |
if __name__ == '__main__': |
|
665 |
# {{{ Option parsing |
|
666 | 66 |
parser = OptionParser(usage="%prog [options] <config-file> " |
667 | 67 |
"<known-hosts-file>") |
668 | 68 |
parser.add_option('--dry-run', dest='dry_run', |
... | ... | |
671 | 71 |
parser.add_option('--yes-do-it', dest='yes_do_it', |
672 | 72 |
action="store_true", |
673 | 73 |
help="Really execute the tests") |
674 |
(options, args) = parser.parse_args() |
|
675 |
# }}} |
|
74 |
(qa_config.options, args) = parser.parse_args() |
|
676 | 75 |
|
677 | 76 |
if len(args) == 2: |
678 | 77 |
(config_file, known_hosts_file) = args |
679 | 78 |
else: |
680 | 79 |
parser.error("Not enough arguments.") |
681 | 80 |
|
682 |
if not options.yes_do_it: |
|
81 |
if not qa_config.options.yes_do_it:
|
|
683 | 82 |
print ("Executing this script irreversibly destroys any Ganeti\n" |
684 | 83 |
"configuration on all nodes involved. If you really want\n" |
685 | 84 |
"to start testing, supply the --yes-do-it option.") |
686 | 85 |
sys.exit(1) |
687 | 86 |
|
688 |
f = open(config_file, 'r') |
|
689 |
try: |
|
690 |
cfg = yaml.load(f.read()) |
|
691 |
finally: |
|
692 |
f.close() |
|
693 |
|
|
694 |
RunTest(TestConfig) |
|
87 |
qa_config.Load(config_file) |
|
695 | 88 |
|
696 |
RunTest(TestUploadKnownHostsFile, known_hosts_file) |
|
89 |
RunTest(qa_other.TestUploadKnownHostsFile, known_hosts_file)
|
|
697 | 90 |
|
698 |
if TestEnabled('env'): |
|
699 |
RunTest(TestSshConnection) |
|
700 |
RunTest(TestIcmpPing) |
|
701 |
RunTest(TestGanetiCommands) |
|
91 |
if qa_config.TestEnabled('env'):
|
|
92 |
RunTest(qa_env.TestSshConnection)
|
|
93 |
RunTest(qa_env.TestIcmpPing)
|
|
94 |
RunTest(qa_env.TestGanetiCommands)
|
|
702 | 95 |
|
703 |
RunTest(TestClusterInit) |
|
96 |
RunTest(qa_cluster.TestClusterInit)
|
|
704 | 97 |
|
705 |
RunTest(TestNodeAddAll) |
|
98 |
RunTest(qa_node.TestNodeAddAll)
|
|
706 | 99 |
|
707 |
if TestEnabled('cluster-verify'): |
|
708 |
RunTest(TestClusterVerify) |
|
100 |
if qa_config.TestEnabled('cluster-verify'):
|
|
101 |
RunTest(qa_cluster.TestClusterVerify)
|
|
709 | 102 |
|
710 |
if TestEnabled('cluster-info'): |
|
711 |
RunTest(TestClusterInfo) |
|
103 |
if qa_config.TestEnabled('cluster-info'):
|
|
104 |
RunTest(qa_cluster.TestClusterInfo)
|
|
712 | 105 |
|
713 |
if TestEnabled('cluster-copyfile'): |
|
714 |
RunTest(TestClusterCopyfile) |
|
106 |
if qa_config.TestEnabled('cluster-copyfile'):
|
|
107 |
RunTest(qa_cluster.TestClusterCopyfile)
|
|
715 | 108 |
|
716 |
if TestEnabled('node-info'): |
|
717 |
RunTest(TestNodeInfo) |
|
109 |
if qa_config.TestEnabled('node-info'):
|
|
110 |
RunTest(qa_node.TestNodeInfo)
|
|
718 | 111 |
|
719 |
if TestEnabled('cluster-burnin'): |
|
720 |
RunTest(TestClusterBurnin) |
|
112 |
if qa_config.TestEnabled('cluster-burnin'):
|
|
113 |
RunTest(qa_cluster.TestClusterBurnin)
|
|
721 | 114 |
|
722 |
if TestEnabled('cluster-master-failover'): |
|
723 |
RunTest(TestClusterMasterFailover) |
|
115 |
if qa_config.TestEnabled('cluster-master-failover'):
|
|
116 |
RunTest(qa_cluster.TestClusterMasterFailover)
|
|
724 | 117 |
|
725 |
node = AcquireNode() |
|
118 |
node = qa_config.AcquireNode()
|
|
726 | 119 |
try: |
727 |
if TestEnabled('instance-add-plain-disk'): |
|
728 |
instance = RunTest(TestInstanceAddWithPlainDisk, node) |
|
729 |
RunTest(TestInstanceShutdown, instance) |
|
730 |
RunTest(TestInstanceStartup, instance) |
|
120 |
if qa_config.TestEnabled('instance-add-plain-disk'):
|
|
121 |
instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, node)
|
|
122 |
RunTest(qa_instance.TestInstanceShutdown, instance)
|
|
123 |
RunTest(qa_instance.TestInstanceStartup, instance)
|
|
731 | 124 |
|
732 |
if TestEnabled('instance-info'): |
|
733 |
RunTest(TestInstanceInfo, instance) |
|
125 |
if qa_config.TestEnabled('instance-info'):
|
|
126 |
RunTest(qa_instance.TestInstanceInfo, instance)
|
|
734 | 127 |
|
735 |
if TestEnabled('instance-automatic-restart'): |
|
736 |
RunTest(TestInstanceAutomaticRestart, node, instance) |
|
128 |
if qa_config.TestEnabled('instance-automatic-restart'):
|
|
129 |
RunTest(qa_daemon.TestInstanceAutomaticRestart, node, instance)
|
|
737 | 130 |
|
738 |
if TestEnabled('instance-consecutive-failures'): |
|
739 |
RunTest(TestInstanceConsecutiveFailures, node, instance) |
|
131 |
if qa_config.TestEnabled('instance-consecutive-failures'):
|
|
132 |
RunTest(qa_daemon.TestInstanceConsecutiveFailures, node, instance)
|
|
740 | 133 |
|
741 |
if TestEnabled('node-volumes'): |
|
742 |
RunTest(TestNodeVolumes) |
|
134 |
if qa_config.TestEnabled('node-volumes'):
|
|
135 |
RunTest(qa_node.TestNodeVolumes)
|
|
743 | 136 |
|
744 |
RunTest(TestInstanceRemove, instance) |
|
137 |
RunTest(qa_instance.TestInstanceRemove, instance)
|
|
745 | 138 |
del instance |
746 | 139 |
|
747 |
if TestEnabled('instance-add-local-mirror-disk'): |
|
748 |
instance = RunTest(TestInstanceAddWithLocalMirrorDisk, node) |
|
749 |
RunTest(TestInstanceShutdown, instance) |
|
750 |
RunTest(TestInstanceStartup, instance) |
|
140 |
if qa_config.TestEnabled('instance-add-local-mirror-disk'):
|
|
141 |
instance = RunTest(qa_instance.TestInstanceAddWithLocalMirrorDisk, node)
|
|
142 |
RunTest(qa_instance.TestInstanceShutdown, instance)
|
|
143 |
RunTest(qa_instance.TestInstanceStartup, instance)
|
|
751 | 144 |
|
752 |
if TestEnabled('instance-info'): |
|
753 |
RunTest(TestInstanceInfo, instance) |
|
145 |
if qa_config.TestEnabled('instance-info'):
|
|
146 |
RunTest(qa_instance.TestInstanceInfo, instance)
|
|
754 | 147 |
|
755 |
if TestEnabled('node-volumes'): |
|
756 |
RunTest(TestNodeVolumes) |
|
148 |
if qa_config.TestEnabled('node-volumes'):
|
|
149 |
RunTest(qa_node.TestNodeVolumes)
|
|
757 | 150 |
|
758 |
RunTest(TestInstanceRemove, instance) |
|
151 |
RunTest(qa_instance.TestInstanceRemove, instance)
|
|
759 | 152 |
del instance |
760 | 153 |
|
761 |
if TestEnabled('instance-add-remote-raid-disk'): |
|
762 |
node2 = AcquireNode(exclude=node) |
|
154 |
if qa_config.TestEnabled('instance-add-remote-raid-disk'):
|
|
155 |
node2 = qa_config.AcquireNode(exclude=node)
|
|
763 | 156 |
try: |
764 |
instance = RunTest(TestInstanceAddWithRemoteRaidDisk, node, node2) |
|
765 |
RunTest(TestInstanceShutdown, instance) |
|
766 |
RunTest(TestInstanceStartup, instance) |
|
157 |
instance = RunTest(qa_instance.TestInstanceAddWithRemoteRaidDisk, |
|
158 |
node, node2) |
|
159 |
RunTest(qa_instance.TestInstanceShutdown, instance) |
|
160 |
RunTest(qa_instance.TestInstanceStartup, instance) |
|
767 | 161 |
|
768 |
if TestEnabled('instance-info'): |
|
769 |
RunTest(TestInstanceInfo, instance) |
|
162 |
if qa_config.TestEnabled('instance-info'):
|
|
163 |
RunTest(qa_instance.TestInstanceInfo, instance)
|
|
770 | 164 |
|
771 |
if TestEnabled('instance-failover'): |
|
772 |
RunTest(TestInstanceFailover, instance) |
|
165 |
if qa_config.TestEnabled('instance-failover'):
|
|
166 |
RunTest(qa_instance.TestInstanceFailover, instance)
|
|
773 | 167 |
|
774 |
if TestEnabled('node-volumes'): |
|
775 |
RunTest(TestNodeVolumes) |
|
168 |
if qa_config.TestEnabled('node-volumes'):
|
|
169 |
RunTest(qa_node.TestNodeVolumes)
|
|
776 | 170 |
|
777 |
RunTest(TestInstanceRemove, instance) |
|
171 |
RunTest(qa_instance.TestInstanceRemove, instance)
|
|
778 | 172 |
del instance |
779 | 173 |
finally: |
780 |
ReleaseNode(node2) |
|
174 |
qa_config.ReleaseNode(node2)
|
|
781 | 175 |
|
782 | 176 |
finally: |
783 |
ReleaseNode(node) |
|
177 |
qa_config.ReleaseNode(node)
|
|
784 | 178 |
|
785 |
RunTest(TestNodeRemoveAll) |
|
179 |
RunTest(qa_node.TestNodeRemoveAll)
|
|
786 | 180 |
|
787 |
if TestEnabled('cluster-destroy'): |
|
788 |
RunTest(TestClusterDestroy) |
|
789 |
# }}} |
|
181 |
if qa_config.TestEnabled('cluster-destroy'): |
|
182 |
RunTest(qa_cluster.TestClusterDestroy) |
|
790 | 183 |
|
791 |
# vim: foldmethod=marker : |
|
184 |
|
|
185 |
if __name__ == '__main__': |
|
186 |
main() |
Also available in: Unified diff