root / qa / ganeti-qa.py @ 44c23fa6
History | View | Annotate | Download (20.2 kB)
1 |
#!/usr/bin/python
|
---|---|
2 |
#
|
3 |
|
4 |
# Copyright (C) 2006, 2007 Google Inc.
|
5 |
#
|
6 |
# This program is free software; you can redistribute it and/or modify
|
7 |
# it under the terms of the GNU General Public License as published by
|
8 |
# the Free Software Foundation; either version 2 of the License, or
|
9 |
# (at your option) any later version.
|
10 |
#
|
11 |
# This program is distributed in the hope that it will be useful, but
|
12 |
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
# General Public License for more details.
|
15 |
#
|
16 |
# You should have received a copy of the GNU General Public License
|
17 |
# along with this program; if not, write to the Free Software
|
18 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
19 |
# 02110-1301, USA.
|
20 |
|
21 |
|
22 |
"""Script for doing Q&A on Ganeti
|
23 |
|
24 |
You can create the required known_hosts file using ssh-keyscan. It's mandatory
|
25 |
to use the full name of a node (FQDN). For security reasons, verify the keys
|
26 |
before using them.
|
27 |
Example: ssh-keyscan -t rsa node{1,2,3,4}.example.com > known_hosts
|
28 |
"""
|
29 |
|
30 |
import os |
31 |
import re |
32 |
import sys |
33 |
import yaml |
34 |
import time |
35 |
import tempfile |
36 |
from datetime import datetime |
37 |
from optparse import OptionParser |
38 |
|
39 |
# I want more flexibility for testing over SSH, therefore I'm not using
|
40 |
# Ganeti's ssh module.
|
41 |
import subprocess |
42 |
|
43 |
from ganeti import utils |
44 |
from ganeti import constants |
45 |
|
46 |
# {{{ Global variables
|
47 |
cfg = None
|
48 |
options = None
|
49 |
# }}}
|
50 |
|
51 |
# {{{ Errors
|
52 |
class Error(Exception): |
53 |
"""An error occurred during Q&A testing.
|
54 |
|
55 |
"""
|
56 |
pass
|
57 |
|
58 |
|
59 |
class OutOfNodesError(Error): |
60 |
"""Out of nodes.
|
61 |
|
62 |
"""
|
63 |
pass
|
64 |
|
65 |
|
66 |
class OutOfInstancesError(Error): |
67 |
"""Out of instances.
|
68 |
|
69 |
"""
|
70 |
pass
|
71 |
# }}}
|
72 |
|
73 |
# {{{ Utilities
|
74 |
def TestEnabled(test): |
75 |
"""Returns True if the given test is enabled."""
|
76 |
return cfg.get('tests', {}).get(test, False) |
77 |
|
78 |
|
79 |
def RunTest(callable, *args): |
80 |
"""Runs a test after printing a header.
|
81 |
|
82 |
"""
|
83 |
if callable.__doc__: |
84 |
desc = callable.__doc__.splitlines()[0].strip() |
85 |
else:
|
86 |
desc = '%r' % callable |
87 |
|
88 |
now = str(datetime.now())
|
89 |
|
90 |
print
|
91 |
print '---', now, ('-' * (55 - len(now))) |
92 |
print desc
|
93 |
print '-' * 60 |
94 |
|
95 |
return callable(*args) |
96 |
|
97 |
|
98 |
def AssertEqual(first, second, msg=None): |
99 |
"""Raises an error when values aren't equal.
|
100 |
|
101 |
"""
|
102 |
if not first == second: |
103 |
raise Error(msg or '%r == %r' % (first, second)) |
104 |
|
105 |
|
106 |
def GetSSHCommand(node, cmd, strict=True): |
107 |
"""Builds SSH command to be executed.
|
108 |
|
109 |
"""
|
110 |
args = [ 'ssh', '-oEscapeChar=none', '-oBatchMode=yes', '-l', 'root' ] |
111 |
|
112 |
if strict:
|
113 |
tmp = 'yes'
|
114 |
else:
|
115 |
tmp = 'no'
|
116 |
args.append('-oStrictHostKeyChecking=%s' % tmp)
|
117 |
args.append('-oClearAllForwardings=yes')
|
118 |
args.append('-oForwardAgent=yes')
|
119 |
args.append(node) |
120 |
|
121 |
if options.dry_run:
|
122 |
prefix = 'exit 0; '
|
123 |
else:
|
124 |
prefix = ''
|
125 |
|
126 |
args.append(prefix + cmd) |
127 |
|
128 |
print 'SSH:', utils.ShellQuoteArgs(args) |
129 |
|
130 |
return args
|
131 |
|
132 |
|
133 |
def StartSSH(node, cmd, strict=True): |
134 |
"""Starts SSH.
|
135 |
|
136 |
"""
|
137 |
args = GetSSHCommand(node, cmd, strict=strict) |
138 |
return subprocess.Popen(args, shell=False) |
139 |
|
140 |
|
141 |
def UploadFile(node, file): |
142 |
"""Uploads a file to a node and returns the filename.
|
143 |
|
144 |
Caller needs to remove the returned file on the node when it's not needed
|
145 |
anymore.
|
146 |
"""
|
147 |
# Make sure nobody else has access to it while preserving local permissions
|
148 |
mode = os.stat(file).st_mode & 0700 |
149 |
|
150 |
cmd = ('tmp=$(tempfile --mode %o --prefix gnt) && '
|
151 |
'[[ -f "${tmp}" ]] && '
|
152 |
'cat > "${tmp}" && '
|
153 |
'echo "${tmp}"') % mode
|
154 |
|
155 |
f = open(file, 'r') |
156 |
try:
|
157 |
p = subprocess.Popen(GetSSHCommand(node, cmd), shell=False, stdin=f,
|
158 |
stdout=subprocess.PIPE) |
159 |
AssertEqual(p.wait(), 0)
|
160 |
|
161 |
# Return temporary filename
|
162 |
return p.stdout.read().strip()
|
163 |
finally:
|
164 |
f.close() |
165 |
# }}}
|
166 |
|
167 |
# {{{ Config helpers
|
168 |
def GetMasterNode(): |
169 |
return cfg['nodes'][0] |
170 |
|
171 |
|
172 |
def AcquireInstance(): |
173 |
"""Returns an instance which isn't in use.
|
174 |
|
175 |
"""
|
176 |
# Filter out unwanted instances
|
177 |
tmp_flt = lambda inst: not inst.get('_used', False) |
178 |
instances = filter(tmp_flt, cfg['instances']) |
179 |
del tmp_flt
|
180 |
|
181 |
if len(instances) == 0: |
182 |
raise OutOfInstancesError("No instances left") |
183 |
|
184 |
inst = instances[0]
|
185 |
inst['_used'] = True |
186 |
return inst
|
187 |
|
188 |
|
189 |
def ReleaseInstance(inst): |
190 |
inst['_used'] = False |
191 |
|
192 |
|
193 |
def AcquireNode(exclude=None): |
194 |
"""Returns the least used node.
|
195 |
|
196 |
"""
|
197 |
master = GetMasterNode() |
198 |
|
199 |
# Filter out unwanted nodes
|
200 |
# TODO: Maybe combine filters
|
201 |
if exclude is None: |
202 |
nodes = cfg['nodes'][:]
|
203 |
else:
|
204 |
nodes = filter(lambda node: node != exclude, cfg['nodes']) |
205 |
|
206 |
tmp_flt = lambda node: node.get('_added', False) or node == master |
207 |
nodes = filter(tmp_flt, nodes)
|
208 |
del tmp_flt
|
209 |
|
210 |
if len(nodes) == 0: |
211 |
raise OutOfNodesError("No nodes left") |
212 |
|
213 |
# Get node with least number of uses
|
214 |
def compare(a, b): |
215 |
result = cmp(a.get('_count', 0), b.get('_count', 0)) |
216 |
if result == 0: |
217 |
result = cmp(a['primary'], b['primary']) |
218 |
return result
|
219 |
|
220 |
nodes.sort(cmp=compare) |
221 |
|
222 |
node = nodes[0]
|
223 |
node['_count'] = node.get('_count', 0) + 1 |
224 |
return node
|
225 |
|
226 |
|
227 |
def ReleaseNode(node): |
228 |
node['_count'] = node.get('_count', 0) - 1 |
229 |
# }}}
|
230 |
|
231 |
# {{{ Environment tests
|
232 |
def TestConfig(): |
233 |
"""Test configuration for sanity.
|
234 |
|
235 |
"""
|
236 |
if len(cfg['nodes']) < 1: |
237 |
raise Error("Need at least one node") |
238 |
if len(cfg['instances']) < 1: |
239 |
raise Error("Need at least one instance") |
240 |
# TODO: Add more checks
|
241 |
|
242 |
|
243 |
def TestSshConnection(): |
244 |
"""Test SSH connection.
|
245 |
|
246 |
"""
|
247 |
for node in cfg['nodes']: |
248 |
AssertEqual(StartSSH(node['primary'], 'exit').wait(), 0) |
249 |
|
250 |
|
251 |
def TestGanetiCommands(): |
252 |
"""Test availibility of Ganeti commands.
|
253 |
|
254 |
"""
|
255 |
cmds = ( ['gnt-cluster', '--version'], |
256 |
['gnt-os', '--version'], |
257 |
['gnt-node', '--version'], |
258 |
['gnt-instance', '--version'], |
259 |
['gnt-backup', '--version'], |
260 |
['ganeti-noded', '--version'], |
261 |
['ganeti-watcher', '--version'] ) |
262 |
|
263 |
cmd = ' && '.join([utils.ShellQuoteArgs(i) for i in cmds]) |
264 |
|
265 |
for node in cfg['nodes']: |
266 |
AssertEqual(StartSSH(node['primary'], cmd).wait(), 0) |
267 |
|
268 |
|
269 |
def TestIcmpPing(): |
270 |
"""ICMP ping each node.
|
271 |
|
272 |
"""
|
273 |
for node in cfg['nodes']: |
274 |
check = [] |
275 |
for i in cfg['nodes']: |
276 |
check.append(i['primary'])
|
277 |
if i.has_key('secondary'): |
278 |
check.append(i['secondary'])
|
279 |
|
280 |
ping = lambda ip: utils.ShellQuoteArgs(['ping', '-w', '3', '-c', '1', ip]) |
281 |
cmd = ' && '.join([ping(i) for i in check]) |
282 |
|
283 |
AssertEqual(StartSSH(node['primary'], cmd).wait(), 0) |
284 |
# }}}
|
285 |
|
286 |
# {{{ Cluster tests
|
287 |
def TestClusterInit(): |
288 |
"""gnt-cluster init"""
|
289 |
master = GetMasterNode() |
290 |
|
291 |
cmd = ['gnt-cluster', 'init'] |
292 |
if master.get('secondary', None): |
293 |
cmd.append('--secondary-ip=%s' % master['secondary']) |
294 |
if cfg.get('bridge', None): |
295 |
cmd.append('--bridge=%s' % cfg['bridge']) |
296 |
cmd.append('--master-netdev=%s' % cfg['bridge']) |
297 |
cmd.append(cfg['name'])
|
298 |
|
299 |
AssertEqual(StartSSH(master['primary'],
|
300 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
301 |
|
302 |
|
303 |
def TestClusterVerify(): |
304 |
"""gnt-cluster verify"""
|
305 |
cmd = ['gnt-cluster', 'verify'] |
306 |
AssertEqual(StartSSH(GetMasterNode()['primary'],
|
307 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
308 |
|
309 |
|
310 |
def TestClusterInfo(): |
311 |
"""gnt-cluster info"""
|
312 |
cmd = ['gnt-cluster', 'info'] |
313 |
AssertEqual(StartSSH(GetMasterNode()['primary'],
|
314 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
315 |
|
316 |
|
317 |
def TestClusterBurnin(): |
318 |
"""Burnin"""
|
319 |
master = GetMasterNode() |
320 |
|
321 |
# Get as many instances as we need
|
322 |
instances = [] |
323 |
try:
|
324 |
for _ in xrange(0, cfg.get('options', {}).get('burnin-instances', 1)): |
325 |
instances.append(AcquireInstance()) |
326 |
except OutOfInstancesError:
|
327 |
print "Not enough instances, continuing anyway." |
328 |
|
329 |
if len(instances) < 1: |
330 |
raise Error("Burnin needs at least one instance") |
331 |
|
332 |
# Run burnin
|
333 |
try:
|
334 |
script = UploadFile(master['primary'], '../tools/burnin') |
335 |
try:
|
336 |
cmd = [script, |
337 |
'--os=%s' % cfg['os'], |
338 |
'--os-size=%s' % cfg['os-size'], |
339 |
'--swap-size=%s' % cfg['swap-size']] |
340 |
cmd += [inst['name'] for inst in instances] |
341 |
AssertEqual(StartSSH(master['primary'],
|
342 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
343 |
finally:
|
344 |
cmd = ['rm', '-f', script] |
345 |
AssertEqual(StartSSH(master['primary'],
|
346 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
347 |
finally:
|
348 |
for inst in instances: |
349 |
ReleaseInstance(inst) |
350 |
|
351 |
|
352 |
def TestClusterMasterFailover(): |
353 |
"""gnt-cluster masterfailover"""
|
354 |
master = GetMasterNode() |
355 |
|
356 |
failovermaster = AcquireNode(exclude=master) |
357 |
try:
|
358 |
cmd = ['gnt-cluster', 'masterfailover'] |
359 |
AssertEqual(StartSSH(failovermaster['primary'],
|
360 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
361 |
|
362 |
cmd = ['gnt-cluster', 'masterfailover'] |
363 |
AssertEqual(StartSSH(master['primary'],
|
364 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
365 |
finally:
|
366 |
ReleaseNode(failovermaster) |
367 |
|
368 |
|
369 |
def TestClusterCopyfile(): |
370 |
"""gnt-cluster copyfile"""
|
371 |
master = GetMasterNode() |
372 |
|
373 |
# Create temporary file
|
374 |
f = tempfile.NamedTemporaryFile() |
375 |
f.write("I'm a testfile.\n")
|
376 |
f.flush() |
377 |
f.seek(0)
|
378 |
|
379 |
# Upload file to master node
|
380 |
testname = UploadFile(master['primary'], f.name)
|
381 |
try:
|
382 |
# Copy file to all nodes
|
383 |
cmd = ['gnt-cluster', 'copyfile', testname] |
384 |
AssertEqual(StartSSH(master['primary'],
|
385 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
386 |
finally:
|
387 |
# Remove file from all nodes
|
388 |
for node in cfg['nodes']: |
389 |
cmd = ['rm', '-f', testname] |
390 |
AssertEqual(StartSSH(node['primary'],
|
391 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
392 |
|
393 |
|
394 |
def TestClusterDestroy(): |
395 |
"""gnt-cluster destroy"""
|
396 |
cmd = ['gnt-cluster', 'destroy', '--yes-do-it'] |
397 |
AssertEqual(StartSSH(GetMasterNode()['primary'],
|
398 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
399 |
# }}}
|
400 |
|
401 |
# {{{ Node tests
|
402 |
def _NodeAdd(node): |
403 |
if node.get('_added', False): |
404 |
raise Error("Node %s already in cluster" % node['primary']) |
405 |
|
406 |
cmd = ['gnt-node', 'add'] |
407 |
if node.get('secondary', None): |
408 |
cmd.append('--secondary-ip=%s' % node['secondary']) |
409 |
cmd.append(node['primary'])
|
410 |
AssertEqual(StartSSH(GetMasterNode()['primary'],
|
411 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
412 |
|
413 |
node['_added'] = True |
414 |
|
415 |
|
416 |
def TestNodeAddAll(): |
417 |
"""Adding all nodes to cluster."""
|
418 |
master = GetMasterNode() |
419 |
for node in cfg['nodes']: |
420 |
if node != master:
|
421 |
_NodeAdd(node) |
422 |
|
423 |
|
424 |
def _NodeRemove(node): |
425 |
cmd = ['gnt-node', 'remove', node['primary']] |
426 |
AssertEqual(StartSSH(GetMasterNode()['primary'],
|
427 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
428 |
node['_added'] = False |
429 |
|
430 |
|
431 |
def TestNodeRemoveAll(): |
432 |
"""Removing all nodes from cluster."""
|
433 |
master = GetMasterNode() |
434 |
for node in cfg['nodes']: |
435 |
if node != master:
|
436 |
_NodeRemove(node) |
437 |
|
438 |
|
439 |
def TestNodeInfo(): |
440 |
"""gnt-node info"""
|
441 |
cmd = ['gnt-node', 'info'] |
442 |
AssertEqual(StartSSH(GetMasterNode()['primary'],
|
443 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
444 |
|
445 |
|
446 |
def TestNodeVolumes(): |
447 |
"""gnt-node volumes"""
|
448 |
cmd = ['gnt-node', 'volumes'] |
449 |
AssertEqual(StartSSH(GetMasterNode()['primary'],
|
450 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
451 |
# }}}
|
452 |
|
453 |
# {{{ Instance tests
|
454 |
def _DiskTest(node, instance, args): |
455 |
cmd = ['gnt-instance', 'add', |
456 |
'--os-type=%s' % cfg['os'], |
457 |
'--os-size=%s' % cfg['os-size'], |
458 |
'--swap-size=%s' % cfg['swap-size'], |
459 |
'--memory=%s' % cfg['mem'], |
460 |
'--node=%s' % node['primary']] |
461 |
if args:
|
462 |
cmd += args |
463 |
cmd.append(instance['name'])
|
464 |
|
465 |
AssertEqual(StartSSH(GetMasterNode()['primary'],
|
466 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
467 |
return instance
|
468 |
|
469 |
|
470 |
def TestInstanceAddWithPlainDisk(node): |
471 |
"""gnt-instance add -t plain"""
|
472 |
return _DiskTest(node, AcquireInstance(), ['--disk-template=plain']) |
473 |
|
474 |
|
475 |
def TestInstanceAddWithLocalMirrorDisk(node): |
476 |
"""gnt-instance add -t local_raid1"""
|
477 |
return _DiskTest(node, AcquireInstance(), ['--disk-template=local_raid1']) |
478 |
|
479 |
|
480 |
def TestInstanceAddWithRemoteRaidDisk(node, node2): |
481 |
"""gnt-instance add -t remote_raid1"""
|
482 |
return _DiskTest(node, AcquireInstance(),
|
483 |
['--disk-template=remote_raid1',
|
484 |
'--secondary-node=%s' % node2['primary']]) |
485 |
|
486 |
|
487 |
def TestInstanceRemove(instance): |
488 |
"""gnt-instance remove"""
|
489 |
cmd = ['gnt-instance', 'remove', '-f', instance['name']] |
490 |
AssertEqual(StartSSH(GetMasterNode()['primary'],
|
491 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
492 |
|
493 |
ReleaseInstance(instance) |
494 |
|
495 |
|
496 |
def TestInstanceStartup(instance): |
497 |
"""gnt-instance startup"""
|
498 |
cmd = ['gnt-instance', 'startup', instance['name']] |
499 |
AssertEqual(StartSSH(GetMasterNode()['primary'],
|
500 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
501 |
|
502 |
|
503 |
def TestInstanceShutdown(instance): |
504 |
"""gnt-instance shutdown"""
|
505 |
cmd = ['gnt-instance', 'shutdown', instance['name']] |
506 |
AssertEqual(StartSSH(GetMasterNode()['primary'],
|
507 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
508 |
|
509 |
|
510 |
def TestInstanceFailover(instance): |
511 |
"""gnt-instance failover"""
|
512 |
cmd = ['gnt-instance', 'failover', '--force', instance['name']] |
513 |
AssertEqual(StartSSH(GetMasterNode()['primary'],
|
514 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
515 |
|
516 |
|
517 |
def TestInstanceInfo(instance): |
518 |
"""gnt-instance info"""
|
519 |
cmd = ['gnt-instance', 'info', instance['name']] |
520 |
AssertEqual(StartSSH(GetMasterNode()['primary'],
|
521 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
522 |
# }}}
|
523 |
|
524 |
# {{{ Daemon tests
|
525 |
def _ResolveInstanceName(instance): |
526 |
"""Gets the full Xen name of an instance.
|
527 |
|
528 |
"""
|
529 |
master = GetMasterNode() |
530 |
|
531 |
info_cmd = utils.ShellQuoteArgs(['gnt-instance', 'info', instance['name']]) |
532 |
sed_cmd = utils.ShellQuoteArgs(['sed', '-n', '-e', 's/^Instance name: *//p']) |
533 |
|
534 |
cmd = '%s | %s' % (info_cmd, sed_cmd)
|
535 |
p = subprocess.Popen(GetSSHCommand(master['primary'], cmd), shell=False, |
536 |
stdout=subprocess.PIPE) |
537 |
AssertEqual(p.wait(), 0)
|
538 |
|
539 |
return p.stdout.read().strip()
|
540 |
|
541 |
|
542 |
def _InstanceRunning(node, name): |
543 |
"""Checks whether an instance is running.
|
544 |
|
545 |
Args:
|
546 |
node: Node the instance runs on
|
547 |
name: Full name of Xen instance
|
548 |
"""
|
549 |
cmd = utils.ShellQuoteArgs(['xm', 'list', name]) + ' >/dev/null' |
550 |
ret = StartSSH(node['primary'], cmd).wait()
|
551 |
return ret == 0 |
552 |
|
553 |
|
554 |
def _XmShutdownInstance(node, name): |
555 |
"""Shuts down instance using "xm" and waits for completion.
|
556 |
|
557 |
Args:
|
558 |
node: Node the instance runs on
|
559 |
name: Full name of Xen instance
|
560 |
"""
|
561 |
cmd = ['xm', 'shutdown', name] |
562 |
AssertEqual(StartSSH(GetMasterNode()['primary'],
|
563 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
564 |
|
565 |
# Wait up to a minute
|
566 |
end = time.time() + 60
|
567 |
while time.time() <= end:
|
568 |
if not _InstanceRunning(node, name): |
569 |
break
|
570 |
time.sleep(5)
|
571 |
else:
|
572 |
raise Error("xm shutdown failed") |
573 |
|
574 |
|
575 |
def _ResetWatcherDaemon(node): |
576 |
"""Removes the watcher daemon's state file.
|
577 |
|
578 |
Args:
|
579 |
node: Node to be reset
|
580 |
"""
|
581 |
cmd = ['rm', '-f', constants.WATCHER_STATEFILE] |
582 |
AssertEqual(StartSSH(node['primary'],
|
583 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
584 |
|
585 |
|
586 |
def TestInstanceAutomaticRestart(node, instance): |
587 |
"""Test automatic restart of instance by ganeti-watcher.
|
588 |
|
589 |
Note: takes up to 6 minutes to complete.
|
590 |
"""
|
591 |
master = GetMasterNode() |
592 |
inst_name = _ResolveInstanceName(instance) |
593 |
|
594 |
_ResetWatcherDaemon(node) |
595 |
_XmShutdownInstance(node, inst_name) |
596 |
|
597 |
# Give it a bit more than five minutes to start again
|
598 |
restart_at = time.time() + 330
|
599 |
|
600 |
# Wait until it's running again
|
601 |
while time.time() <= restart_at:
|
602 |
if _InstanceRunning(node, inst_name):
|
603 |
break
|
604 |
time.sleep(15)
|
605 |
else:
|
606 |
raise Error("Daemon didn't restart instance in time") |
607 |
|
608 |
cmd = ['gnt-instance', 'info', inst_name] |
609 |
AssertEqual(StartSSH(master['primary'],
|
610 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
611 |
|
612 |
|
613 |
def TestInstanceConsecutiveFailures(node, instance): |
614 |
"""Test five consecutive instance failures.
|
615 |
|
616 |
Note: takes at least 35 minutes to complete.
|
617 |
"""
|
618 |
master = GetMasterNode() |
619 |
inst_name = _ResolveInstanceName(instance) |
620 |
|
621 |
_ResetWatcherDaemon(node) |
622 |
_XmShutdownInstance(node, inst_name) |
623 |
|
624 |
# Do shutdowns for 30 minutes
|
625 |
finished_at = time.time() + (35 * 60) |
626 |
|
627 |
while time.time() <= finished_at:
|
628 |
if _InstanceRunning(node, inst_name):
|
629 |
_XmShutdownInstance(node, inst_name) |
630 |
time.sleep(30)
|
631 |
|
632 |
# Check for some time whether the instance doesn't start again
|
633 |
check_until = time.time() + 330
|
634 |
while time.time() <= check_until:
|
635 |
if _InstanceRunning(node, inst_name):
|
636 |
raise Error("Instance started when it shouldn't") |
637 |
time.sleep(30)
|
638 |
|
639 |
cmd = ['gnt-instance', 'info', inst_name] |
640 |
AssertEqual(StartSSH(master['primary'],
|
641 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
642 |
# }}}
|
643 |
|
644 |
# {{{ Other tests
|
645 |
def TestUploadKnownHostsFile(localpath): |
646 |
"""Uploading known_hosts file.
|
647 |
|
648 |
"""
|
649 |
master = GetMasterNode() |
650 |
|
651 |
tmpfile = UploadFile(master['primary'], localpath)
|
652 |
try:
|
653 |
cmd = ['mv', tmpfile, constants.SSH_KNOWN_HOSTS_FILE]
|
654 |
AssertEqual(StartSSH(master['primary'],
|
655 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
656 |
except:
|
657 |
cmd = ['rm', '-f', tmpfile] |
658 |
AssertEqual(StartSSH(master['primary'],
|
659 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
660 |
raise
|
661 |
# }}}
|
662 |
|
663 |
# {{{ Main program
|
664 |
if __name__ == '__main__': |
665 |
# {{{ Option parsing
|
666 |
parser = OptionParser(usage="%prog [options] <config-file> "
|
667 |
"<known-hosts-file>")
|
668 |
parser.add_option('--dry-run', dest='dry_run', |
669 |
action="store_true",
|
670 |
help="Show what would be done")
|
671 |
parser.add_option('--yes-do-it', dest='yes_do_it', |
672 |
action="store_true",
|
673 |
help="Really execute the tests")
|
674 |
(options, args) = parser.parse_args() |
675 |
# }}}
|
676 |
|
677 |
if len(args) == 2: |
678 |
(config_file, known_hosts_file) = args |
679 |
else:
|
680 |
parser.error("Not enough arguments.")
|
681 |
|
682 |
if not options.yes_do_it: |
683 |
print ("Executing this script irreversibly destroys any Ganeti\n" |
684 |
"configuration on all nodes involved. If you really want\n"
|
685 |
"to start testing, supply the --yes-do-it option.")
|
686 |
sys.exit(1)
|
687 |
|
688 |
f = open(config_file, 'r') |
689 |
try:
|
690 |
cfg = yaml.load(f.read()) |
691 |
finally:
|
692 |
f.close() |
693 |
|
694 |
RunTest(TestConfig) |
695 |
|
696 |
RunTest(TestUploadKnownHostsFile, known_hosts_file) |
697 |
|
698 |
if TestEnabled('env'): |
699 |
RunTest(TestSshConnection) |
700 |
RunTest(TestIcmpPing) |
701 |
RunTest(TestGanetiCommands) |
702 |
|
703 |
RunTest(TestClusterInit) |
704 |
|
705 |
RunTest(TestNodeAddAll) |
706 |
|
707 |
if TestEnabled('cluster-verify'): |
708 |
RunTest(TestClusterVerify) |
709 |
|
710 |
if TestEnabled('cluster-info'): |
711 |
RunTest(TestClusterInfo) |
712 |
|
713 |
if TestEnabled('cluster-copyfile'): |
714 |
RunTest(TestClusterCopyfile) |
715 |
|
716 |
if TestEnabled('node-info'): |
717 |
RunTest(TestNodeInfo) |
718 |
|
719 |
if TestEnabled('cluster-burnin'): |
720 |
RunTest(TestClusterBurnin) |
721 |
|
722 |
if TestEnabled('cluster-master-failover'): |
723 |
RunTest(TestClusterMasterFailover) |
724 |
|
725 |
node = AcquireNode() |
726 |
try:
|
727 |
if TestEnabled('instance-add-plain-disk'): |
728 |
instance = RunTest(TestInstanceAddWithPlainDisk, node) |
729 |
RunTest(TestInstanceShutdown, instance) |
730 |
RunTest(TestInstanceStartup, instance) |
731 |
|
732 |
if TestEnabled('instance-info'): |
733 |
RunTest(TestInstanceInfo, instance) |
734 |
|
735 |
if TestEnabled('instance-automatic-restart'): |
736 |
RunTest(TestInstanceAutomaticRestart, node, instance) |
737 |
|
738 |
if TestEnabled('instance-consecutive-failures'): |
739 |
RunTest(TestInstanceConsecutiveFailures, node, instance) |
740 |
|
741 |
if TestEnabled('node-volumes'): |
742 |
RunTest(TestNodeVolumes) |
743 |
|
744 |
RunTest(TestInstanceRemove, instance) |
745 |
del instance
|
746 |
|
747 |
if TestEnabled('instance-add-local-mirror-disk'): |
748 |
instance = RunTest(TestInstanceAddWithLocalMirrorDisk, node) |
749 |
RunTest(TestInstanceShutdown, instance) |
750 |
RunTest(TestInstanceStartup, instance) |
751 |
|
752 |
if TestEnabled('instance-info'): |
753 |
RunTest(TestInstanceInfo, instance) |
754 |
|
755 |
if TestEnabled('node-volumes'): |
756 |
RunTest(TestNodeVolumes) |
757 |
|
758 |
RunTest(TestInstanceRemove, instance) |
759 |
del instance
|
760 |
|
761 |
if TestEnabled('instance-add-remote-raid-disk'): |
762 |
node2 = AcquireNode(exclude=node) |
763 |
try:
|
764 |
instance = RunTest(TestInstanceAddWithRemoteRaidDisk, node, node2) |
765 |
RunTest(TestInstanceShutdown, instance) |
766 |
RunTest(TestInstanceStartup, instance) |
767 |
|
768 |
if TestEnabled('instance-info'): |
769 |
RunTest(TestInstanceInfo, instance) |
770 |
|
771 |
if TestEnabled('instance-failover'): |
772 |
RunTest(TestInstanceFailover, instance) |
773 |
|
774 |
if TestEnabled('node-volumes'): |
775 |
RunTest(TestNodeVolumes) |
776 |
|
777 |
RunTest(TestInstanceRemove, instance) |
778 |
del instance
|
779 |
finally:
|
780 |
ReleaseNode(node2) |
781 |
|
782 |
finally:
|
783 |
ReleaseNode(node) |
784 |
|
785 |
RunTest(TestNodeRemoveAll) |
786 |
|
787 |
if TestEnabled('cluster-destroy'): |
788 |
RunTest(TestClusterDestroy) |
789 |
# }}}
|
790 |
|
791 |
# vim: foldmethod=marker :
|