67 |
67 |
raise qa_error.Error("xm shutdown failed")
|
68 |
68 |
|
69 |
69 |
|
70 |
|
def _ResetWatcherDaemon(node):
|
|
70 |
def _ResetWatcherDaemon():
|
71 |
71 |
"""Removes the watcher daemon's state file.
|
72 |
72 |
|
73 |
73 |
Args:
|
74 |
74 |
node: Node to be reset
|
75 |
75 |
"""
|
|
76 |
master = qa_config.GetMasterNode()
|
|
77 |
|
76 |
78 |
cmd = ['rm', '-f', constants.WATCHER_STATEFILE]
|
77 |
|
AssertEqual(StartSSH(node['primary'],
|
|
79 |
AssertEqual(StartSSH(master['primary'],
|
|
80 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
|
81 |
|
|
82 |
|
|
83 |
def _RunWatcherDaemon():
|
|
84 |
"""Runs the ganeti-watcher daemon on the master node.
|
|
85 |
|
|
86 |
"""
|
|
87 |
master = qa_config.GetMasterNode()
|
|
88 |
|
|
89 |
cmd = ['ganeti-watcher', '-d']
|
|
90 |
AssertEqual(StartSSH(master['primary'],
|
78 |
91 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
79 |
92 |
|
80 |
93 |
|
81 |
94 |
def PrintCronWarning():
|
82 |
|
"""Shows a warning about the required cron job.
|
|
95 |
"""Shows a warning about the cron job.
|
83 |
96 |
|
84 |
97 |
"""
|
|
98 |
msg = ("For the following tests it's recommended to turn off the "
|
|
99 |
"ganeti-watcher cronjob.")
|
85 |
100 |
print
|
86 |
|
print qa_utils.FormatWarning("The following tests require the cron script "
|
87 |
|
"for ganeti-watcher to be set up.")
|
|
101 |
print qa_utils.FormatWarning(msg)
|
88 |
102 |
|
89 |
103 |
|
90 |
104 |
def TestInstanceAutomaticRestart(node, instance):
|
91 |
105 |
"""Test automatic restart of instance by ganeti-watcher.
|
92 |
106 |
|
93 |
|
Note: takes up to 6 minutes to complete.
|
94 |
107 |
"""
|
95 |
108 |
master = qa_config.GetMasterNode()
|
96 |
109 |
inst_name = qa_utils.ResolveInstanceName(instance)
|
97 |
110 |
|
98 |
|
_ResetWatcherDaemon(node)
|
|
111 |
_ResetWatcherDaemon()
|
99 |
112 |
_XmShutdownInstance(node, inst_name)
|
100 |
113 |
|
101 |
|
# Give it a bit more than five minutes to start again
|
102 |
|
restart_at = time.time() + 330
|
|
114 |
_RunWatcherDaemon()
|
|
115 |
time.sleep(5)
|
103 |
116 |
|
104 |
|
# Wait until it's running again
|
105 |
|
while time.time() <= restart_at:
|
106 |
|
if _InstanceRunning(node, inst_name):
|
107 |
|
break
|
108 |
|
time.sleep(15)
|
109 |
|
else:
|
110 |
|
raise qa_error.Error("Daemon didn't restart instance in time")
|
|
117 |
if not _InstanceRunning(node, inst_name):
|
|
118 |
raise qa_error.Error("Daemon didn't restart instance")
|
111 |
119 |
|
112 |
120 |
cmd = ['gnt-instance', 'info', inst_name]
|
113 |
121 |
AssertEqual(StartSSH(master['primary'],
|
... | ... | |
117 |
125 |
def TestInstanceConsecutiveFailures(node, instance):
|
118 |
126 |
"""Test five consecutive instance failures.
|
119 |
127 |
|
120 |
|
Note: takes at least 35 minutes to complete.
|
121 |
128 |
"""
|
122 |
129 |
master = qa_config.GetMasterNode()
|
123 |
130 |
inst_name = qa_utils.ResolveInstanceName(instance)
|
124 |
131 |
|
125 |
|
_ResetWatcherDaemon(node)
|
126 |
|
_XmShutdownInstance(node, inst_name)
|
127 |
|
|
128 |
|
# Do shutdowns for 30 minutes
|
129 |
|
finished_at = time.time() + (35 * 60)
|
|
132 |
_ResetWatcherDaemon()
|
130 |
133 |
|
131 |
|
while time.time() <= finished_at:
|
132 |
|
if _InstanceRunning(node, inst_name):
|
133 |
|
_XmShutdownInstance(node, inst_name)
|
134 |
|
time.sleep(30)
|
|
134 |
for should_start in ([True] * 5) + [False]:
|
|
135 |
_XmShutdownInstance(node, inst_name)
|
|
136 |
_RunWatcherDaemon()
|
|
137 |
time.sleep(5)
|
135 |
138 |
|
136 |
|
# Check for some time whether the instance doesn't start again
|
137 |
|
check_until = time.time() + 330
|
138 |
|
while time.time() <= check_until:
|
139 |
|
if _InstanceRunning(node, inst_name):
|
140 |
|
raise qa_error.Error("Instance started when it shouldn't")
|
141 |
|
time.sleep(30)
|
|
139 |
if bool(_InstanceRunning(node, inst_name)) != should_start:
|
|
140 |
if should_start:
|
|
141 |
msg = "Instance not started when it should"
|
|
142 |
else:
|
|
143 |
msg = "Instance started when it shouldn't"
|
|
144 |
raise qa_error.Error(msg)
|
142 |
145 |
|
143 |
146 |
cmd = ['gnt-instance', 'info', inst_name]
|
144 |
147 |
AssertEqual(StartSSH(master['primary'],
|