20 |
20 |
|
21 |
21 |
"""
|
22 |
22 |
|
|
23 |
import re
|
|
24 |
import time
|
|
25 |
|
23 |
26 |
from ganeti import utils
|
24 |
27 |
from ganeti import constants
|
25 |
28 |
|
26 |
29 |
import qa_config
|
27 |
30 |
import qa_utils
|
|
31 |
import qa_error
|
|
32 |
|
|
33 |
from qa_utils import AssertEqual, AssertNotEqual, StartSSH
|
28 |
34 |
|
29 |
|
from qa_utils import AssertEqual, StartSSH
|
|
35 |
|
|
36 |
def _GetDiskStatePath(disk):
|
|
37 |
return "/sys/block/%s/device/state" % disk
|
30 |
38 |
|
31 |
39 |
|
32 |
40 |
def _GetGenericAddParameters():
|
... | ... | |
172 |
180 |
cmd = ['gnt-backup', 'list', '--nodes=%s' % expnode['primary']]
|
173 |
181 |
AssertEqual(StartSSH(master['primary'],
|
174 |
182 |
utils.ShellQuoteArgs(cmd)).wait(), 0)
|
|
183 |
|
|
184 |
|
|
185 |
def _TestInstanceDiskFailure(instance, node, node2, onmaster):
|
|
186 |
"""Testing disk failure."""
|
|
187 |
master = qa_config.GetMasterNode()
|
|
188 |
sq = utils.ShellQuoteArgs
|
|
189 |
|
|
190 |
instance_full = qa_utils.ResolveInstanceName(instance)
|
|
191 |
node_full = qa_utils.ResolveNodeName(node)
|
|
192 |
node2_full = qa_utils.ResolveNodeName(node2)
|
|
193 |
|
|
194 |
cmd = ['gnt-node', 'volumes', '--separator=|', '--no-headers',
|
|
195 |
'--output=node,phys,instance',
|
|
196 |
node['primary'], node2['primary']]
|
|
197 |
output = qa_utils.GetCommandOutput(master['primary'], sq(cmd))
|
|
198 |
|
|
199 |
# Get physical disk names
|
|
200 |
re_disk = re.compile(r'^/dev/([a-z]+)\d+$')
|
|
201 |
node2disk = {}
|
|
202 |
for line in output.splitlines():
|
|
203 |
(node_name, phys, inst) = line.split('|')
|
|
204 |
if inst == instance_full:
|
|
205 |
if node_name not in node2disk:
|
|
206 |
node2disk[node_name] = []
|
|
207 |
|
|
208 |
m = re_disk.match(phys)
|
|
209 |
if not m:
|
|
210 |
raise qa_error.Error("Unknown disk name format: %s" % disk)
|
|
211 |
|
|
212 |
name = m.group(1)
|
|
213 |
if name not in node2disk[node_name]:
|
|
214 |
node2disk[node_name].append(name)
|
|
215 |
|
|
216 |
if [node2_full, node_full][int(onmaster)] not in node2disk:
|
|
217 |
raise qa_error.Error("Couldn't find physical disks used on "
|
|
218 |
"%s node" % ["secondary", "master"][int(onmaster)])
|
|
219 |
|
|
220 |
# Check whether nodes have ability to stop disks
|
|
221 |
for node_name, disks in node2disk.iteritems():
|
|
222 |
cmds = []
|
|
223 |
for disk in disks:
|
|
224 |
cmds.append(sq(["test", "-f", _GetDiskStatePath(disk)]))
|
|
225 |
AssertEqual(StartSSH(node_name, ' && '.join(cmds)).wait(), 0)
|
|
226 |
|
|
227 |
# Get device paths
|
|
228 |
cmd = ['gnt-instance', 'activate-disks', instance['name']]
|
|
229 |
output = qa_utils.GetCommandOutput(master['primary'], sq(cmd))
|
|
230 |
devpath = []
|
|
231 |
for line in output.splitlines():
|
|
232 |
(_, _, tmpdevpath) = line.split(':')
|
|
233 |
devpath.append(tmpdevpath)
|
|
234 |
|
|
235 |
# Get drbd device paths
|
|
236 |
cmd = ['gnt-instance', 'info', instance['name']]
|
|
237 |
output = qa_utils.GetCommandOutput(master['primary'], sq(cmd))
|
|
238 |
pattern = (r'\s+-\s+type:\s+drbd,\s+.*$'
|
|
239 |
r'\s+primary:\s+(/dev/drbd\d+)\s+')
|
|
240 |
drbddevs = re.findall(pattern, output, re.M)
|
|
241 |
|
|
242 |
# Deactivate disks on secondary node
|
|
243 |
halted_disks = []
|
|
244 |
cmds = []
|
|
245 |
for name in node2disk[[node2_full, node_full][int(onmaster)]]:
|
|
246 |
halted_disks.append(name)
|
|
247 |
cmds.append(sq(["echo", "offline"]) + " >%s" % _GetDiskStatePath(name))
|
|
248 |
AssertEqual(StartSSH([node2, node][int(onmaster)]['primary'],
|
|
249 |
'; '.join(cmds)).wait(), 0)
|
|
250 |
try:
|
|
251 |
# Write something to the disks and give some time to notice the problem
|
|
252 |
cmds = []
|
|
253 |
for disk in devpath:
|
|
254 |
cmds.append(sq(["dd", "count=1", "bs=512", "conv=notrunc",
|
|
255 |
"if=%s" % disk, "of=%s" % disk]))
|
|
256 |
for _ in (0, 1, 2):
|
|
257 |
AssertEqual(StartSSH(node['primary'], ' && '.join(cmds)).wait(), 0)
|
|
258 |
time.sleep(3)
|
|
259 |
|
|
260 |
# For manual checks
|
|
261 |
cmd = ['gnt-instance', 'info', instance['name']]
|
|
262 |
AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
|
|
263 |
|
|
264 |
finally:
|
|
265 |
# Activate disks again
|
|
266 |
cmds = []
|
|
267 |
for name in halted_disks:
|
|
268 |
cmds.append(sq(["echo", "running"]) + " >%s" % _GetDiskStatePath(name))
|
|
269 |
AssertEqual(StartSSH([node2, node][int(onmaster)]['primary'],
|
|
270 |
'; '.join(cmds)).wait(), 0)
|
|
271 |
|
|
272 |
# Restart instance
|
|
273 |
cmd = ['gnt-instance', 'shutdown', instance['name']]
|
|
274 |
AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
|
|
275 |
|
|
276 |
cmd = ['gnt-instance', 'startup', '--force', instance['name']]
|
|
277 |
AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
|
|
278 |
|
|
279 |
# Make sure disks are up again
|
|
280 |
cmd = ['gnt-instance', 'activate-disks', instance['name']]
|
|
281 |
AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
|
|
282 |
|
|
283 |
cmd = ['gnt-cluster', 'verify']
|
|
284 |
AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
|
|
285 |
|
|
286 |
|
|
287 |
def TestInstanceMasterDiskFailure(instance, node, node2):
|
|
288 |
"""Testing disk failure on master node."""
|
|
289 |
qa_utils.PrintError("Disk failure on primary node cannot be "
|
|
290 |
"tested due to potential crashes.")
|
|
291 |
# The following can cause crashes, thus it's disabled until fixed
|
|
292 |
#return _TestInstanceDiskFailure(instance, node, node2, True)
|
|
293 |
|
|
294 |
|
|
295 |
def TestInstanceSecondaryDiskFailure(instance, node, node2):
|
|
296 |
"""Testing disk failure on secondary node."""
|
|
297 |
return _TestInstanceDiskFailure(instance, node, node2, False)
|