Revision e8ae0c20 qa/qa_instance.py
b/qa/qa_instance.py | ||
---|---|---|
20 | 20 |
|
21 | 21 |
""" |
22 | 22 |
|
23 |
import re |
|
24 |
import time |
|
25 |
|
|
23 | 26 |
from ganeti import utils |
24 | 27 |
from ganeti import constants |
25 | 28 |
|
26 | 29 |
import qa_config |
27 | 30 |
import qa_utils |
31 |
import qa_error |
|
32 |
|
|
33 |
from qa_utils import AssertEqual, AssertNotEqual, StartSSH |
|
28 | 34 |
|
29 |
from qa_utils import AssertEqual, StartSSH |
|
35 |
|
|
36 |
def _GetDiskStatePath(disk): |
|
37 |
return "/sys/block/%s/device/state" % disk |
|
30 | 38 |
|
31 | 39 |
|
32 | 40 |
def _GetGenericAddParameters(): |
... | ... | |
172 | 180 |
cmd = ['gnt-backup', 'list', '--nodes=%s' % expnode['primary']] |
173 | 181 |
AssertEqual(StartSSH(master['primary'], |
174 | 182 |
utils.ShellQuoteArgs(cmd)).wait(), 0) |
183 |
|
|
184 |
|
|
185 |
def _TestInstanceDiskFailure(instance, node, node2, onmaster): |
|
186 |
"""Testing disk failure.""" |
|
187 |
master = qa_config.GetMasterNode() |
|
188 |
sq = utils.ShellQuoteArgs |
|
189 |
|
|
190 |
instance_full = qa_utils.ResolveInstanceName(instance) |
|
191 |
node_full = qa_utils.ResolveNodeName(node) |
|
192 |
node2_full = qa_utils.ResolveNodeName(node2) |
|
193 |
|
|
194 |
cmd = ['gnt-node', 'volumes', '--separator=|', '--no-headers', |
|
195 |
'--output=node,phys,instance', |
|
196 |
node['primary'], node2['primary']] |
|
197 |
output = qa_utils.GetCommandOutput(master['primary'], sq(cmd)) |
|
198 |
|
|
199 |
# Get physical disk names |
|
200 |
re_disk = re.compile(r'^/dev/([a-z]+)\d+$') |
|
201 |
node2disk = {} |
|
202 |
for line in output.splitlines(): |
|
203 |
(node_name, phys, inst) = line.split('|') |
|
204 |
if inst == instance_full: |
|
205 |
if node_name not in node2disk: |
|
206 |
node2disk[node_name] = [] |
|
207 |
|
|
208 |
m = re_disk.match(phys) |
|
209 |
if not m: |
|
210 |
raise qa_error.Error("Unknown disk name format: %s" % disk) |
|
211 |
|
|
212 |
name = m.group(1) |
|
213 |
if name not in node2disk[node_name]: |
|
214 |
node2disk[node_name].append(name) |
|
215 |
|
|
216 |
if [node2_full, node_full][int(onmaster)] not in node2disk: |
|
217 |
raise qa_error.Error("Couldn't find physical disks used on " |
|
218 |
"%s node" % ["secondary", "master"][int(onmaster)]) |
|
219 |
|
|
220 |
# Check whether nodes have ability to stop disks |
|
221 |
for node_name, disks in node2disk.iteritems(): |
|
222 |
cmds = [] |
|
223 |
for disk in disks: |
|
224 |
cmds.append(sq(["test", "-f", _GetDiskStatePath(disk)])) |
|
225 |
AssertEqual(StartSSH(node_name, ' && '.join(cmds)).wait(), 0) |
|
226 |
|
|
227 |
# Get device paths |
|
228 |
cmd = ['gnt-instance', 'activate-disks', instance['name']] |
|
229 |
output = qa_utils.GetCommandOutput(master['primary'], sq(cmd)) |
|
230 |
devpath = [] |
|
231 |
for line in output.splitlines(): |
|
232 |
(_, _, tmpdevpath) = line.split(':') |
|
233 |
devpath.append(tmpdevpath) |
|
234 |
|
|
235 |
# Get drbd device paths |
|
236 |
cmd = ['gnt-instance', 'info', instance['name']] |
|
237 |
output = qa_utils.GetCommandOutput(master['primary'], sq(cmd)) |
|
238 |
pattern = (r'\s+-\s+type:\s+drbd,\s+.*$' |
|
239 |
r'\s+primary:\s+(/dev/drbd\d+)\s+') |
|
240 |
drbddevs = re.findall(pattern, output, re.M) |
|
241 |
|
|
242 |
# Deactivate disks on secondary node |
|
243 |
halted_disks = [] |
|
244 |
cmds = [] |
|
245 |
for name in node2disk[[node2_full, node_full][int(onmaster)]]: |
|
246 |
halted_disks.append(name) |
|
247 |
cmds.append(sq(["echo", "offline"]) + " >%s" % _GetDiskStatePath(name)) |
|
248 |
AssertEqual(StartSSH([node2, node][int(onmaster)]['primary'], |
|
249 |
'; '.join(cmds)).wait(), 0) |
|
250 |
try: |
|
251 |
# Write something to the disks and give some time to notice the problem |
|
252 |
cmds = [] |
|
253 |
for disk in devpath: |
|
254 |
cmds.append(sq(["dd", "count=1", "bs=512", "conv=notrunc", |
|
255 |
"if=%s" % disk, "of=%s" % disk])) |
|
256 |
for _ in (0, 1, 2): |
|
257 |
AssertEqual(StartSSH(node['primary'], ' && '.join(cmds)).wait(), 0) |
|
258 |
time.sleep(3) |
|
259 |
|
|
260 |
# For manual checks |
|
261 |
cmd = ['gnt-instance', 'info', instance['name']] |
|
262 |
AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0) |
|
263 |
|
|
264 |
finally: |
|
265 |
# Activate disks again |
|
266 |
cmds = [] |
|
267 |
for name in halted_disks: |
|
268 |
cmds.append(sq(["echo", "running"]) + " >%s" % _GetDiskStatePath(name)) |
|
269 |
AssertEqual(StartSSH([node2, node][int(onmaster)]['primary'], |
|
270 |
'; '.join(cmds)).wait(), 0) |
|
271 |
|
|
272 |
# Restart instance |
|
273 |
cmd = ['gnt-instance', 'shutdown', instance['name']] |
|
274 |
AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0) |
|
275 |
|
|
276 |
cmd = ['gnt-instance', 'startup', '--force', instance['name']] |
|
277 |
AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0) |
|
278 |
|
|
279 |
# Make sure disks are up again |
|
280 |
cmd = ['gnt-instance', 'activate-disks', instance['name']] |
|
281 |
AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0) |
|
282 |
|
|
283 |
cmd = ['gnt-cluster', 'verify'] |
|
284 |
AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0) |
|
285 |
|
|
286 |
|
|
287 |
def TestInstanceMasterDiskFailure(instance, node, node2): |
|
288 |
"""Testing disk failure on master node.""" |
|
289 |
qa_utils.PrintError("Disk failure on primary node cannot be " |
|
290 |
"tested due to potential crashes.") |
|
291 |
# The following can cause crashes, thus it's disabled until fixed |
|
292 |
#return _TestInstanceDiskFailure(instance, node, node2, True) |
|
293 |
|
|
294 |
|
|
295 |
def TestInstanceSecondaryDiskFailure(instance, node, node2): |
|
296 |
"""Testing disk failure on secondary node.""" |
|
297 |
return _TestInstanceDiskFailure(instance, node, node2, False) |
Also available in: Unified diff