QA for instance migration.
[ganeti-local] / qa / qa_instance.py
1 # Copyright (C) 2007 Google Inc.
2 #
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful, but
9 # WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 # General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 # 02110-1301, USA.
17
18
19 """Instance related QA tests.
20
21 """
22
23 import re
24 import time
25
26 from ganeti import utils
27 from ganeti import constants
28
29 import qa_config
30 import qa_utils
31 import qa_error
32
33 from qa_utils import AssertEqual, AssertNotEqual, StartSSH
34
35
36 def _GetDiskStatePath(disk):
37   return "/sys/block/%s/device/state" % disk
38
39
40 def _GetGenericAddParameters():
41   return ['--os-size=%s' % qa_config.get('os-size'),
42           '--swap-size=%s' % qa_config.get('swap-size'),
43           '--memory=%s' % qa_config.get('mem')]
44
45
46 def _DiskTest(node, disk_template):
47   master = qa_config.GetMasterNode()
48
49   instance = qa_config.AcquireInstance()
50   try:
51     cmd = (['gnt-instance', 'add',
52             '--os-type=%s' % qa_config.get('os'),
53             '--disk-template=%s' % disk_template,
54             '--node=%s' % node] +
55            _GetGenericAddParameters())
56     cmd.append(instance['name'])
57
58     AssertEqual(StartSSH(master['primary'],
59                          utils.ShellQuoteArgs(cmd)).wait(), 0)
60     return instance
61   except:
62     qa_config.ReleaseInstance(instance)
63     raise
64
65
66 @qa_utils.DefineHook('instance-add-plain-disk')
67 def TestInstanceAddWithPlainDisk(node):
68   """gnt-instance add -t plain"""
69   return _DiskTest(node['primary'], 'plain')
70
71
72 @qa_utils.DefineHook('instance-add-local-mirror-disk')
73 def TestInstanceAddWithLocalMirrorDisk(node):
74   """gnt-instance add -t local_raid1"""
75   return _DiskTest(node['primary'], 'local_raid1')
76
77
78 @qa_utils.DefineHook('instance-add-remote-raid-disk')
79 def TestInstanceAddWithRemoteRaidDisk(node, node2):
80   """gnt-instance add -t remote_raid1"""
81   return _DiskTest("%s:%s" % (node['primary'], node2['primary']),
82                    'remote_raid1')
83
84
85 @qa_utils.DefineHook('instance-add-drbd-disk')
86 def TestInstanceAddWithDrbdDisk(node, node2):
87   """gnt-instance add -t drbd"""
88   return _DiskTest("%s:%s" % (node['primary'], node2['primary']),
89                    'drbd')
90
91
92 @qa_utils.DefineHook('instance-grow-disk')
93 def TestInstanceGrowDisk(instance, should_fail):
94   """gnt-instance grow-disk"""
95   master = qa_config.GetMasterNode()
96   grow_size = qa_config.get('options', {}).get('grow-disk-size', '1g')
97
98   for device in ['sda', 'sdb']:
99     cmd = (['gnt-instance', 'grow-disk', instance['name'], device, grow_size])
100     code = StartSSH(master['primary'], utils.ShellQuoteArgs(cmd)).wait()
101     if should_fail:
102       AssertNotEqual(code, 0)
103     else:
104       AssertEqual(code, 0)
105
106
107 @qa_utils.DefineHook('instance-remove')
108 def TestInstanceRemove(instance):
109   """gnt-instance remove"""
110   master = qa_config.GetMasterNode()
111
112   cmd = ['gnt-instance', 'remove', '-f', instance['name']]
113   AssertEqual(StartSSH(master['primary'],
114                        utils.ShellQuoteArgs(cmd)).wait(), 0)
115
116   qa_config.ReleaseInstance(instance)
117
118
119 @qa_utils.DefineHook('instance-startup')
120 def TestInstanceStartup(instance):
121   """gnt-instance startup"""
122   master = qa_config.GetMasterNode()
123
124   cmd = ['gnt-instance', 'startup', instance['name']]
125   AssertEqual(StartSSH(master['primary'],
126                        utils.ShellQuoteArgs(cmd)).wait(), 0)
127
128
129 @qa_utils.DefineHook('instance-shutdown')
130 def TestInstanceShutdown(instance):
131   """gnt-instance shutdown"""
132   master = qa_config.GetMasterNode()
133
134   cmd = ['gnt-instance', 'shutdown', instance['name']]
135   AssertEqual(StartSSH(master['primary'],
136                        utils.ShellQuoteArgs(cmd)).wait(), 0)
137
138
139 @qa_utils.DefineHook('instance-reboot')
140 def TestInstanceReboot(instance):
141   """gnt-instance reboot"""
142   master = qa_config.GetMasterNode()
143
144   for reboottype in ["soft", "hard", "full"]:
145     cmd = ['gnt-instance', 'reboot', '--type=%s' % reboottype,
146            instance['name']]
147     AssertEqual(StartSSH(master['primary'],
148                          utils.ShellQuoteArgs(cmd)).wait(), 0)
149
150
151 @qa_utils.DefineHook('instance-reinstall')
152 def TestInstanceReinstall(instance):
153   """gnt-instance reinstall"""
154   master = qa_config.GetMasterNode()
155
156   cmd = ['gnt-instance', 'reinstall', '-f', instance['name']]
157   AssertEqual(StartSSH(master['primary'],
158                        utils.ShellQuoteArgs(cmd)).wait(), 0)
159
160
161 @qa_utils.DefineHook('instance-failover')
162 def TestInstanceFailover(instance):
163   """gnt-instance failover"""
164   master = qa_config.GetMasterNode()
165
166   cmd = ['gnt-instance', 'failover', '--force', instance['name']]
167   AssertEqual(StartSSH(master['primary'],
168                        utils.ShellQuoteArgs(cmd)).wait(), 0)
169
170   # ... and back
171   cmd = ['gnt-instance', 'failover', '--force', instance['name']]
172   AssertEqual(StartSSH(master['primary'],
173                        utils.ShellQuoteArgs(cmd)).wait(), 0)
174
175
176 @qa_utils.DefineHook('instance-migrate')
177 def TestInstanceMigrate(instance):
178   """gnt-instance migrate"""
179   master = qa_config.GetMasterNode()
180   migrations = qa_config.get('options', {}).get('instance-migrations', 1)
181
182   for _ in range(migrations):
183     cmd = ['gnt-instance', 'migrate', '-f', instance['name']]
184     AssertEqual(StartSSH(master['primary'],
185                          utils.ShellQuoteArgs(cmd)).wait(), 0)
186
187     # ... and back
188     cmd = ['gnt-instance', 'migrate', '-f', instance['name']]
189     AssertEqual(StartSSH(master['primary'],
190                          utils.ShellQuoteArgs(cmd)).wait(), 0)
191   
192   # ...and once with --cleanup
193   cmd = ['gnt-instance', 'migrate', '-f', '--cleanup', instance['name']]
194   AssertEqual(StartSSH(master['primary'],
195                        utils.ShellQuoteArgs(cmd)).wait(), 0)
196
197   # ... and back
198   cmd = ['gnt-instance', 'migrate', '-f', '--cleanup', instance['name']]
199   AssertEqual(StartSSH(master['primary'],
200                        utils.ShellQuoteArgs(cmd)).wait(), 0)
201
202
203 @qa_utils.DefineHook('instance-info')
204 def TestInstanceInfo(instance):
205   """gnt-instance info"""
206   master = qa_config.GetMasterNode()
207
208   cmd = ['gnt-instance', 'info', instance['name']]
209   AssertEqual(StartSSH(master['primary'],
210                        utils.ShellQuoteArgs(cmd)).wait(), 0)
211
212
213 @qa_utils.DefineHook('instance-modify')
214 def TestInstanceModify(instance):
215   """gnt-instance modify"""
216   master = qa_config.GetMasterNode()
217
218   orig_memory = qa_config.get('mem')
219   orig_bridge = qa_config.get('bridge', 'xen-br0')
220   args = [
221     ["--memory", "128"],
222     ["--memory", str(orig_memory)],
223     ["--cpu", "2"],
224     ["--cpu", "1"],
225     ["--bridge", "xen-br1"],
226     ["--bridge", orig_bridge],
227     ["--kernel", "/dev/null"],
228     ["--kernel", "default"],
229     ["--initrd", "/dev/null"],
230     ["--initrd", "none"],
231     ["--initrd", "default"],
232     ["--hvm-boot-order", "acn"],
233     ["--hvm-boot-order", "default"],
234     ]
235   for alist in args:
236     cmd = ['gnt-instance', 'modify'] + alist + [instance['name']]
237     AssertEqual(StartSSH(master['primary'],
238                          utils.ShellQuoteArgs(cmd)).wait(), 0)
239
240   # check no-modify
241   cmd = ['gnt-instance', 'modify', instance['name']]
242   AssertNotEqual(StartSSH(master['primary'],
243                           utils.ShellQuoteArgs(cmd)).wait(), 0)
244
245
246 @qa_utils.DefineHook('instance-list')
247 def TestInstanceList():
248   """gnt-instance list"""
249   master = qa_config.GetMasterNode()
250
251   cmd = ['gnt-instance', 'list']
252   AssertEqual(StartSSH(master['primary'],
253                        utils.ShellQuoteArgs(cmd)).wait(), 0)
254
255
256 @qa_utils.DefineHook('instance-replace-disks')
257 def TestReplaceDisks(instance, pnode, snode, othernode, is_drbd):
258   """gnt-instance replace-disks"""
259   master = qa_config.GetMasterNode()
260
261   def buildcmd(args):
262     cmd = ['gnt-instance', 'replace-disks']
263     cmd.extend(args)
264     cmd.append(instance["name"])
265     return cmd
266
267   if not is_drbd:
268     # remote_raid1
269     cmd = buildcmd([])
270     AssertEqual(StartSSH(master['primary'],
271                          utils.ShellQuoteArgs(cmd)).wait(), 0)
272   else:
273     # drbd
274     cmd = buildcmd(["-p"])
275     AssertEqual(StartSSH(master['primary'],
276                          utils.ShellQuoteArgs(cmd)).wait(), 0)
277
278     cmd = buildcmd(["-s"])
279     AssertEqual(StartSSH(master['primary'],
280                          utils.ShellQuoteArgs(cmd)).wait(), 0)
281
282   cmd = buildcmd(["--new-secondary=%s" % othernode["primary"]])
283   AssertEqual(StartSSH(master['primary'],
284                        utils.ShellQuoteArgs(cmd)).wait(), 0)
285
286   # Restore
287   cmd = buildcmd(["--new-secondary=%s" % snode["primary"]])
288   AssertEqual(StartSSH(master['primary'],
289                        utils.ShellQuoteArgs(cmd)).wait(), 0)
290
291
292 @qa_utils.DefineHook('backup-export')
293 def TestInstanceExport(instance, node):
294   """gnt-backup export"""
295   master = qa_config.GetMasterNode()
296
297   cmd = ['gnt-backup', 'export', '-n', node['primary'], instance['name']]
298   AssertEqual(StartSSH(master['primary'],
299                        utils.ShellQuoteArgs(cmd)).wait(), 0)
300
301   return qa_utils.ResolveInstanceName(instance)
302
303
304 @qa_utils.DefineHook('backup-import')
305 def TestInstanceImport(node, newinst, expnode, name):
306   """gnt-backup import"""
307   master = qa_config.GetMasterNode()
308
309   cmd = (['gnt-backup', 'import',
310           '--disk-template=plain',
311           '--no-ip-check',
312           '--src-node=%s' % expnode['primary'],
313           '--src-dir=%s/%s' % (constants.EXPORT_DIR, name),
314           '--node=%s' % node['primary']] +
315          _GetGenericAddParameters())
316   cmd.append(newinst['name'])
317   AssertEqual(StartSSH(master['primary'],
318                        utils.ShellQuoteArgs(cmd)).wait(), 0)
319
320
321 @qa_utils.DefineHook('backup-list')
322 def TestBackupList(expnode):
323   """gnt-backup list"""
324   master = qa_config.GetMasterNode()
325
326   cmd = ['gnt-backup', 'list', '--node=%s' % expnode['primary']]
327   AssertEqual(StartSSH(master['primary'],
328                        utils.ShellQuoteArgs(cmd)).wait(), 0)
329
330
331 def _TestInstanceDiskFailure(instance, node, node2, onmaster):
332   """Testing disk failure."""
333   master = qa_config.GetMasterNode()
334   sq = utils.ShellQuoteArgs
335
336   instance_full = qa_utils.ResolveInstanceName(instance)
337   node_full = qa_utils.ResolveNodeName(node)
338   node2_full = qa_utils.ResolveNodeName(node2)
339
340   cmd = ['gnt-node', 'volumes', '--separator=|', '--no-headers',
341          '--output=node,phys,instance',
342          node['primary'], node2['primary']]
343   output = qa_utils.GetCommandOutput(master['primary'], sq(cmd))
344
345   # Get physical disk names
346   re_disk = re.compile(r'^/dev/([a-z]+)\d+$')
347   node2disk = {}
348   for line in output.splitlines():
349     (node_name, phys, inst) = line.split('|')
350     if inst == instance_full:
351       if node_name not in node2disk:
352         node2disk[node_name] = []
353
354       m = re_disk.match(phys)
355       if not m:
356         raise qa_error.Error("Unknown disk name format: %s" % disk)
357
358       name = m.group(1)
359       if name not in node2disk[node_name]:
360         node2disk[node_name].append(name)
361
362   if [node2_full, node_full][int(onmaster)] not in node2disk:
363     raise qa_error.Error("Couldn't find physical disks used on"
364                          " %s node" % ["secondary", "master"][int(onmaster)])
365
366   # Check whether nodes have ability to stop disks
367   for node_name, disks in node2disk.iteritems():
368     cmds = []
369     for disk in disks:
370       cmds.append(sq(["test", "-f", _GetDiskStatePath(disk)]))
371     AssertEqual(StartSSH(node_name, ' && '.join(cmds)).wait(), 0)
372
373   # Get device paths
374   cmd = ['gnt-instance', 'activate-disks', instance['name']]
375   output = qa_utils.GetCommandOutput(master['primary'], sq(cmd))
376   devpath = []
377   for line in output.splitlines():
378     (_, _, tmpdevpath) = line.split(':')
379     devpath.append(tmpdevpath)
380
381   # Get drbd device paths
382   cmd = ['gnt-instance', 'info', instance['name']]
383   output = qa_utils.GetCommandOutput(master['primary'], sq(cmd))
384   pattern = (r'\s+-\s+type:\s+drbd,\s+.*$'
385              r'\s+primary:\s+(/dev/drbd\d+)\s+')
386   drbddevs = re.findall(pattern, output, re.M)
387
388   halted_disks = []
389   try:
390     # Deactivate disks
391     cmds = []
392     for name in node2disk[[node2_full, node_full][int(onmaster)]]:
393       halted_disks.append(name)
394       cmds.append(sq(["echo", "offline"]) + " >%s" % _GetDiskStatePath(name))
395     AssertEqual(StartSSH([node2, node][int(onmaster)]['primary'],
396                          ' && '.join(cmds)).wait(), 0)
397
398     # Write something to the disks and give some time to notice the problem
399     cmds = []
400     for disk in devpath:
401       cmds.append(sq(["dd", "count=1", "bs=512", "conv=notrunc",
402                       "if=%s" % disk, "of=%s" % disk]))
403     for _ in (0, 1, 2):
404       AssertEqual(StartSSH(node['primary'], ' && '.join(cmds)).wait(), 0)
405       time.sleep(3)
406
407     for name in drbddevs:
408       cmd = ['drbdsetup', name, 'show']
409       AssertEqual(StartSSH(node['primary'], sq(cmd)).wait(), 0)
410
411     # For manual checks
412     cmd = ['gnt-instance', 'info', instance['name']]
413     AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
414
415   finally:
416     # Activate disks again
417     cmds = []
418     for name in halted_disks:
419       cmds.append(sq(["echo", "running"]) + " >%s" % _GetDiskStatePath(name))
420     AssertEqual(StartSSH([node2, node][int(onmaster)]['primary'],
421                          '; '.join(cmds)).wait(), 0)
422
423   if onmaster:
424     for name in drbddevs:
425       cmd = ['drbdsetup', name, 'detach']
426       AssertEqual(StartSSH(node['primary'], sq(cmd)).wait(), 0)
427   else:
428     for name in drbddevs:
429       cmd = ['drbdsetup', name, 'disconnect']
430       AssertEqual(StartSSH(node2['primary'], sq(cmd)).wait(), 0)
431
432   # Make sure disks are up again
433   #cmd = ['gnt-instance', 'activate-disks', instance['name']]
434   #AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
435
436   # Restart instance
437   cmd = ['gnt-instance', 'shutdown', instance['name']]
438   AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
439
440   #cmd = ['gnt-instance', 'startup', '--force', instance['name']]
441   cmd = ['gnt-instance', 'startup', instance['name']]
442   AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
443
444   cmd = ['gnt-cluster', 'verify']
445   AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0)
446
447
448 def TestInstanceMasterDiskFailure(instance, node, node2):
449   """Testing disk failure on master node."""
450   print qa_utils.FormatError("Disk failure on primary node cannot be"
451                              " tested due to potential crashes.")
452   # The following can cause crashes, thus it's disabled until fixed
453   #return _TestInstanceDiskFailure(instance, node, node2, True)
454
455
456 def TestInstanceSecondaryDiskFailure(instance, node, node2):
457   """Testing disk failure on secondary node."""
458   return _TestInstanceDiskFailure(instance, node, node2, False)