Revision e8ae0c20 qa/qa_instance.py

b/qa/qa_instance.py
20 20

  
21 21
"""
22 22

  
23
import re
24
import time
25

  
23 26
from ganeti import utils
24 27
from ganeti import constants
25 28

  
26 29
import qa_config
27 30
import qa_utils
31
import qa_error
32

  
33
from qa_utils import AssertEqual, AssertNotEqual, StartSSH
28 34

  
29
from qa_utils import AssertEqual, StartSSH
35

  
36
def _GetDiskStatePath(disk):
37
  return "/sys/block/%s/device/state" % disk
30 38

  
31 39

  
32 40
def _GetGenericAddParameters():
......
172 180
  cmd = ['gnt-backup', 'list', '--nodes=%s' % expnode['primary']]
173 181
  AssertEqual(StartSSH(master['primary'],
174 182
                       utils.ShellQuoteArgs(cmd)).wait(), 0)
183

  
184

  
185
def _TestInstanceDiskFailure(instance, node, node2, onmaster):
186
  """Testing disk failure."""
187
  master = qa_config.GetMasterNode()
188
  sq = utils.ShellQuoteArgs
189

  
190
  instance_full = qa_utils.ResolveInstanceName(instance)
191
  node_full = qa_utils.ResolveNodeName(node)
192
  node2_full = qa_utils.ResolveNodeName(node2)
193

  
194
  cmd = ['gnt-node', 'volumes', '--separator=|', '--no-headers',
195
         '--output=node,phys,instance',
196
         node['primary'], node2['primary']]
197
  output = qa_utils.GetCommandOutput(master['primary'], sq(cmd))
198

  
199
  # Get physical disk names
200
  re_disk = re.compile(r'^/dev/([a-z]+)\d+$')
201
  node2disk = {}
202
  for line in output.splitlines():
203
    (node_name, phys, inst) = line.split('|')
204
    if inst == instance_full:
205
      if node_name not in node2disk:
206
        node2disk[node_name] = []
207

  
208
      m = re_disk.match(phys)
209
      if not m:
210
        raise qa_error.Error("Unknown disk name format: %s" % disk)
211

  
212
      name = m.group(1)
213
      if name not in node2disk[node_name]:
214
        node2disk[node_name].append(name)
215

  
216
  if [node2_full, node_full][int(onmaster)] not in node2disk:
217
    raise qa_error.Error("Couldn't find physical disks used on "
218
                         "%s node" % ["secondary", "master"][int(onmaster)])
219

  
220
  # Check whether nodes have ability to stop disks
221
  for node_name, disks in node2disk.iteritems():
222
    cmds = []
223
    for disk in disks:
224
      cmds.append(sq(["test", "-f", _GetDiskStatePath(disk)]))
225
    AssertEqual(StartSSH(node_name, ' && '.join(cmds)).wait(), 0)
226

  
227
  # Get device paths
228
  cmd = ['gnt-instance', 'activate-disks', instance['name']]
229
  output = qa_utils.GetCommandOutput(master['primary'], sq(cmd))
230
  devpath = []
231
  for line in output.splitlines():
232
    (_, _, tmpdevpath) = line.split(':')
233
    devpath.append(tmpdevpath)
234

  
235
  # Get drbd device paths
236
  cmd = ['gnt-instance', 'info', instance['name']]
237
  output = qa_utils.GetCommandOutput(master['primary'], sq(cmd))
238
  pattern = (r'\s+-\s+type:\s+drbd,\s+.*$'
239
             r'\s+primary:\s+(/dev/drbd\d+)\s+')
240
  drbddevs = re.findall(pattern, output, re.M)
241

  
242
  # Deactivate disks on secondary node
243
  halted_disks = []
244
  cmds = []
245
  for name in node2disk[[node2_full, node_full][int(onmaster)]]:
246
    halted_disks.append(name)
247
    cmds.append(sq(["echo", "offline"]) + " >%s" % _GetDiskStatePath(name))
248
  AssertEqual(StartSSH([node2, node][int(onmaster)]['primary'],
249
                       '; '.join(cmds)).wait(), 0)
250
  try:
251
    # Write something to the disks and give some time to notice the problem
252
    cmds = []
253
    for disk in devpath:
254
      cmds.append(sq(["dd", "count=1", "bs=512", "conv=notrunc",
255
                      "if=%s" % disk, "of=%s" % disk]))
256
    for _ in (0, 1, 2):
257
      AssertEqual(StartSSH(node['primary'], ' && '.join(cmds)).wait(), 0)
258
      time.sleep(3)
259

  
260
    # For manual checks
261
    cmd = ['gnt-instance', 'info', instance['name']]
262
    AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
263

  
264
  finally:
265
    # Activate disks again
266
    cmds = []
267
    for name in halted_disks:
268
      cmds.append(sq(["echo", "running"]) + " >%s" % _GetDiskStatePath(name))
269
    AssertEqual(StartSSH([node2, node][int(onmaster)]['primary'],
270
                         '; '.join(cmds)).wait(), 0)
271

  
272
  # Restart instance
273
  cmd = ['gnt-instance', 'shutdown', instance['name']]
274
  AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
275

  
276
  cmd = ['gnt-instance', 'startup', '--force', instance['name']]
277
  AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
278

  
279
  # Make sure disks are up again
280
  cmd = ['gnt-instance', 'activate-disks', instance['name']]
281
  AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
282

  
283
  cmd = ['gnt-cluster', 'verify']
284
  AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
285

  
286

  
287
def TestInstanceMasterDiskFailure(instance, node, node2):
288
  """Testing disk failure on master node."""
289
  qa_utils.PrintError("Disk failure on primary node cannot be "
290
                      "tested due to potential crashes.")
291
  # The following can cause crashes, thus it's disabled until fixed
292
  #return _TestInstanceDiskFailure(instance, node, node2, True)
293

  
294

  
295
def TestInstanceSecondaryDiskFailure(instance, node, node2):
296
  """Testing disk failure on secondary node."""
297
  return _TestInstanceDiskFailure(instance, node, node2, False)

Also available in: Unified diff