-#!/usr/bin/python
+#!/usr/bin/python -u
#
-# Copyright (C) 2006, 2007 Google Inc.
+# Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# 02110-1301, USA.
-"""Script for doing Q&A on Ganeti
+"""Script for doing QA on Ganeti.
-You can create the required known_hosts file using ssh-keyscan. It's mandatory
-to use the full name of a node (FQDN). For security reasons, verify the keys
-before using them.
-Example: ssh-keyscan -t rsa node{1,2,3,4}.example.com > known_hosts
"""
-import os
-import re
-import sys
-import yaml
-import time
-import tempfile
-from datetime import datetime
-from optparse import OptionParser
+# pylint: disable=C0103
+# due to invalid name
-# I want more flexibility for testing over SSH, therefore I'm not using
-# Ganeti's ssh module.
-import subprocess
+import sys
+import datetime
+import optparse
+
+import qa_cluster
+import qa_config
+import qa_daemon
+import qa_env
+import qa_error
+import qa_group
+import qa_instance
+import qa_node
+import qa_os
+import qa_job
+import qa_rapi
+import qa_tags
+import qa_utils
from ganeti import utils
+from ganeti import rapi # pylint: disable=W0611
from ganeti import constants
-# {{{ Global variables
-cfg = None
-options = None
-# }}}
-
-# {{{ Errors
-class Error(Exception):
- """An error occurred during Q&A testing.
-
- """
- pass
-
-
-class OutOfNodesError(Error):
- """Out of nodes.
-
- """
- pass
-
-
-class OutOfInstancesError(Error):
- """Out of instances.
-
- """
- pass
-# }}}
-
-# {{{ Utilities
-def TestEnabled(test):
- """Returns True if the given test is enabled."""
- return cfg.get('tests', {}).get(test, False)
-
-
-def RunTest(callable, *args):
- """Runs a test after printing a header.
-
- """
- if callable.__doc__:
- desc = callable.__doc__.splitlines()[0].strip()
- else:
- desc = '%r' % callable
-
- now = str(datetime.now())
-
- print
- print '---', now, ('-' * (55 - len(now)))
- print desc
- print '-' * 60
-
- return callable(*args)
+import ganeti.rapi.client # pylint: disable=W0611
+from ganeti.rapi.client import UsesRapiClient
-def AssertEqual(first, second, msg=None):
- """Raises an error when values aren't equal.
+def _FormatHeader(line, end=72):
+ """Fill a line up to the end column.
"""
- if not first == second:
- raise Error(msg or '%r == %r' % (first, second))
+ line = "---- " + line + " "
+ line += "-" * (end - len(line))
+ line = line.rstrip()
+ return line
-def GetSSHCommand(node, cmd, strict=True):
- """Builds SSH command to be executed.
+def _DescriptionOf(fn):
+ """Computes the description of an item.
"""
- args = [ 'ssh', '-oEscapeChar=none', '-oBatchMode=yes', '-l', 'root' ]
-
- if strict:
- tmp = 'yes'
- else:
- tmp = 'no'
- args.append('-oStrictHostKeyChecking=%s' % tmp)
- args.append('-oClearAllForwardings=yes')
- args.append('-oForwardAgent=yes')
- args.append(node)
-
- if options.dry_run:
- prefix = 'exit 0; '
+ if fn.__doc__:
+ desc = fn.__doc__.splitlines()[0].strip()
else:
- prefix = ''
-
- args.append(prefix + cmd)
+ desc = "%r" % fn
- print 'SSH:', utils.ShellQuoteArgs(args)
+ return desc.rstrip(".")
- return args
-
-def StartSSH(node, cmd, strict=True):
- """Starts SSH.
+def RunTest(fn, *args, **kwargs):
+ """Runs a test after printing a header.
"""
- args = GetSSHCommand(node, cmd, strict=strict)
- return subprocess.Popen(args, shell=False)
+ tstart = datetime.datetime.now()
-def UploadFile(node, file):
- """Uploads a file to a node and returns the filename.
+ desc = _DescriptionOf(fn)
- Caller needs to remove the returned file on the node when it's not needed
- anymore.
- """
- # Make sure nobody else has access to it while preserving local permissions
- mode = os.stat(file).st_mode & 0700
-
- cmd = ('tmp=$(tempfile --mode %o --prefix gnt) && '
- '[[ -f "${tmp}" ]] && '
- 'cat > "${tmp}" && '
- 'echo "${tmp}"') % mode
+ print
+ print _FormatHeader("%s start %s" % (tstart, desc))
- f = open(file, 'r')
try:
- p = subprocess.Popen(GetSSHCommand(node, cmd), shell=False, stdin=f,
- stdout=subprocess.PIPE)
- AssertEqual(p.wait(), 0)
-
- # Return temporary filename
- return p.stdout.read().strip()
+ retval = fn(*args, **kwargs)
+ return retval
finally:
- f.close()
-# }}}
+ tstop = datetime.datetime.now()
+ tdelta = tstop - tstart
+ print _FormatHeader("%s time=%s %s" % (tstop, tdelta, desc))
-# {{{ Config helpers
-def GetMasterNode():
- return cfg['nodes'][0]
+def RunTestIf(testnames, fn, *args, **kwargs):
+ """Runs a test conditionally.
-def AcquireInstance():
- """Returns an instance which isn't in use.
+ @param testnames: either a single test name in the configuration
+ file, or a list of testnames (which will be AND-ed together)
"""
- # Filter out unwanted instances
- tmp_flt = lambda inst: not inst.get('_used', False)
- instances = filter(tmp_flt, cfg['instances'])
- del tmp_flt
+ if qa_config.TestEnabled(testnames):
+ RunTest(fn, *args, **kwargs)
+ else:
+ tstart = datetime.datetime.now()
+ desc = _DescriptionOf(fn)
+ print _FormatHeader("%s skipping %s, test(s) %s disabled" %
+ (tstart, desc, testnames))
- if len(instances) == 0:
- raise OutOfInstancesError("No instances left")
- inst = instances[0]
- inst['_used'] = True
- return inst
+def RunEnvTests():
+ """Run several environment tests.
+ """
+ RunTestIf("env", qa_env.TestSshConnection)
+ RunTestIf("env", qa_env.TestIcmpPing)
+ RunTestIf("env", qa_env.TestGanetiCommands)
-def ReleaseInstance(inst):
- inst['_used'] = False
+def SetupCluster(rapi_user, rapi_secret):
+ """Initializes the cluster.
-def AcquireNode(exclude=None):
- """Returns the least used node.
+ @param rapi_user: Login user for RAPI
+ @param rapi_secret: Login secret for RAPI
"""
- master = GetMasterNode()
+ RunTestIf("create-cluster", qa_cluster.TestClusterInit,
+ rapi_user, rapi_secret)
- # Filter out unwanted nodes
- # TODO: Maybe combine filters
- if exclude is None:
- nodes = cfg['nodes'][:]
- else:
- nodes = filter(lambda node: node != exclude, cfg['nodes'])
+ # Test on empty cluster
+ RunTestIf("node-list", qa_node.TestNodeList)
+ RunTestIf("instance-list", qa_instance.TestInstanceList)
+ RunTestIf("job-list", qa_job.TestJobList)
- tmp_flt = lambda node: node.get('_added', False) or node == master
- nodes = filter(tmp_flt, nodes)
- del tmp_flt
+ RunTestIf("create-cluster", qa_node.TestNodeAddAll)
+ if not qa_config.TestEnabled("create-cluster"):
+ # consider the nodes are already there
+ qa_node.MarkNodeAddedAll()
- if len(nodes) == 0:
- raise OutOfNodesError("No nodes left")
+ RunTestIf("test-jobqueue", qa_cluster.TestJobqueue)
- # Get node with least number of uses
- def compare(a, b):
- result = cmp(a.get('_count', 0), b.get('_count', 0))
- if result == 0:
- result = cmp(a['primary'], b['primary'])
- return result
+ # enable the watcher (unconditionally)
+ RunTest(qa_daemon.TestResumeWatcher)
- nodes.sort(cmp=compare)
+ RunTestIf("node-list", qa_node.TestNodeList)
- node = nodes[0]
- node['_count'] = node.get('_count', 0) + 1
- return node
+ # Test listing fields
+ RunTestIf("node-list", qa_node.TestNodeListFields)
+ RunTestIf("instance-list", qa_instance.TestInstanceListFields)
+ RunTestIf("job-list", qa_job.TestJobListFields)
+ RunTestIf("instance-export", qa_instance.TestBackupListFields)
+ RunTestIf("node-info", qa_node.TestNodeInfo)
-def ReleaseNode(node):
- node['_count'] = node.get('_count', 0) - 1
-# }}}
-# {{{ Environment tests
-def TestConfig():
- """Test configuration for sanity.
+def RunClusterTests():
+ """Runs tests related to gnt-cluster.
"""
- if len(cfg['nodes']) < 1:
- raise Error("Need at least one node")
- if len(cfg['instances']) < 1:
- raise Error("Need at least one instance")
- # TODO: Add more checks
-
-
-def TestSshConnection():
- """Test SSH connection.
+ for test, fn in [
+ ("create-cluster", qa_cluster.TestClusterInitDisk),
+ ("cluster-renew-crypto", qa_cluster.TestClusterRenewCrypto),
+ ("cluster-verify", qa_cluster.TestClusterVerify),
+ ("cluster-reserved-lvs", qa_cluster.TestClusterReservedLvs),
+ # TODO: add more cluster modify tests
+ ("cluster-modify", qa_cluster.TestClusterModifyEmpty),
+ ("cluster-modify", qa_cluster.TestClusterModifyBe),
+ ("cluster-modify", qa_cluster.TestClusterModifyDisk),
+ ("cluster-rename", qa_cluster.TestClusterRename),
+ ("cluster-info", qa_cluster.TestClusterVersion),
+ ("cluster-info", qa_cluster.TestClusterInfo),
+ ("cluster-info", qa_cluster.TestClusterGetmaster),
+ ("cluster-redist-conf", qa_cluster.TestClusterRedistConf),
+ ("cluster-copyfile", qa_cluster.TestClusterCopyfile),
+ ("cluster-command", qa_cluster.TestClusterCommand),
+ ("cluster-burnin", qa_cluster.TestClusterBurnin),
+ ("cluster-master-failover", qa_cluster.TestClusterMasterFailover),
+ ("cluster-master-failover",
+ qa_cluster.TestClusterMasterFailoverWithDrainedQueue),
+ ("cluster-oob", qa_cluster.TestClusterOob),
+ ("rapi", qa_rapi.TestVersion),
+ ("rapi", qa_rapi.TestEmptyCluster),
+ ("rapi", qa_rapi.TestRapiQuery),
+ ]:
+ RunTestIf(test, fn)
+
+
+def RunRepairDiskSizes():
+ """Run the repair disk-sizes test.
"""
- for node in cfg['nodes']:
- AssertEqual(StartSSH(node['primary'], 'exit').wait(), 0)
+ RunTestIf("cluster-repair-disk-sizes", qa_cluster.TestClusterRepairDiskSizes)
-def TestGanetiCommands():
- """Test availibility of Ganeti commands.
+def RunOsTests():
+ """Runs all tests related to gnt-os.
"""
- cmds = ( ['gnt-cluster', '--version'],
- ['gnt-os', '--version'],
- ['gnt-node', '--version'],
- ['gnt-instance', '--version'],
- ['gnt-backup', '--version'],
- ['ganeti-noded', '--version'],
- ['ganeti-watcher', '--version'] )
-
- cmd = ' && '.join([utils.ShellQuoteArgs(i) for i in cmds])
-
- for node in cfg['nodes']:
- AssertEqual(StartSSH(node['primary'], cmd).wait(), 0)
-
-
-def TestIcmpPing():
- """ICMP ping each node.
-
- """
- for node in cfg['nodes']:
- check = []
- for i in cfg['nodes']:
- check.append(i['primary'])
- if i.has_key('secondary'):
- check.append(i['secondary'])
-
- ping = lambda ip: utils.ShellQuoteArgs(['ping', '-w', '3', '-c', '1', ip])
- cmd = ' && '.join([ping(i) for i in check])
-
- AssertEqual(StartSSH(node['primary'], cmd).wait(), 0)
-# }}}
-
-# {{{ Cluster tests
-def TestClusterInit():
- """gnt-cluster init"""
- master = GetMasterNode()
-
- cmd = ['gnt-cluster', 'init']
- if master.get('secondary', None):
- cmd.append('--secondary-ip=%s' % master['secondary'])
- if cfg.get('bridge', None):
- cmd.append('--bridge=%s' % cfg['bridge'])
- cmd.append('--master-netdev=%s' % cfg['bridge'])
- cmd.append(cfg['name'])
-
- AssertEqual(StartSSH(master['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
-
-
-def TestClusterVerify():
- """gnt-cluster verify"""
- cmd = ['gnt-cluster', 'verify']
- AssertEqual(StartSSH(GetMasterNode()['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+ if qa_config.TestEnabled("rapi"):
+ rapi_getos = qa_rapi.GetOperatingSystems
+ else:
+ rapi_getos = None
+ for fn in [
+ qa_os.TestOsList,
+ qa_os.TestOsDiagnose,
+ ]:
+ RunTestIf("os", fn)
-def TestClusterInfo():
- """gnt-cluster info"""
- cmd = ['gnt-cluster', 'info']
- AssertEqual(StartSSH(GetMasterNode()['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+ for fn in [
+ qa_os.TestOsValid,
+ qa_os.TestOsInvalid,
+ qa_os.TestOsPartiallyValid,
+ ]:
+ RunTestIf("os", fn, rapi_getos)
+ for fn in [
+ qa_os.TestOsModifyValid,
+ qa_os.TestOsModifyInvalid,
+ qa_os.TestOsStatesNonExisting,
+ ]:
+ RunTestIf("os", fn)
-def TestClusterBurnin():
- """Burnin"""
- master = GetMasterNode()
- # Get as many instances as we need
- instances = []
- try:
- for _ in xrange(0, cfg.get('options', {}).get('burnin-instances', 1)):
- instances.append(AcquireInstance())
- except OutOfInstancesError:
- print "Not enough instances, continuing anyway."
+def RunCommonInstanceTests(instance):
+ """Runs a few tests that are common to all disk types.
- if len(instances) < 1:
- raise Error("Burnin needs at least one instance")
-
- # Run burnin
- try:
- script = UploadFile(master['primary'], '../tools/burnin')
+ """
+ RunTestIf("instance-shutdown", qa_instance.TestInstanceShutdown, instance)
+ RunTestIf(["instance-shutdown", "instance-console", "rapi"],
+ qa_rapi.TestRapiStoppedInstanceConsole, instance)
+ RunTestIf(["instance-shutdown", "instance-modify"],
+ qa_instance.TestInstanceStoppedModify, instance)
+ RunTestIf("instance-shutdown", qa_instance.TestInstanceStartup, instance)
+
+ # Test shutdown/start via RAPI
+ RunTestIf(["instance-shutdown", "rapi"],
+ qa_rapi.TestRapiInstanceShutdown, instance)
+ RunTestIf(["instance-shutdown", "rapi"],
+ qa_rapi.TestRapiInstanceStartup, instance)
+
+ RunTestIf("instance-list", qa_instance.TestInstanceList)
+
+ RunTestIf("instance-info", qa_instance.TestInstanceInfo, instance)
+
+ RunTestIf("instance-modify", qa_instance.TestInstanceModify, instance)
+ RunTestIf(["instance-modify", "rapi"],
+ qa_rapi.TestRapiInstanceModify, instance)
+
+ RunTestIf("instance-console", qa_instance.TestInstanceConsole, instance)
+ RunTestIf(["instance-console", "rapi"],
+ qa_rapi.TestRapiInstanceConsole, instance)
+
+ DOWN_TESTS = qa_config.Either([
+ "instance-reinstall",
+ "instance-rename",
+ "instance-grow-disk",
+ ])
+
+ # shutdown instance for any 'down' tests
+ RunTestIf(DOWN_TESTS, qa_instance.TestInstanceShutdown, instance)
+
+ # now run the 'down' state tests
+ RunTestIf("instance-reinstall", qa_instance.TestInstanceReinstall, instance)
+ RunTestIf(["instance-reinstall", "rapi"],
+ qa_rapi.TestRapiInstanceReinstall, instance)
+
+ if qa_config.TestEnabled("instance-rename"):
+ tgt_instance = qa_config.AcquireInstance()
try:
- cmd = [script,
- '--os=%s' % cfg['os'],
- '--os-size=%s' % cfg['os-size'],
- '--swap-size=%s' % cfg['swap-size']]
- cmd += [inst['name'] for inst in instances]
- AssertEqual(StartSSH(master['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+ rename_source = instance["name"]
+ rename_target = tgt_instance["name"]
+ # perform instance rename to the same name
+ RunTest(qa_instance.TestInstanceRenameAndBack,
+ rename_source, rename_source)
+ RunTestIf("rapi", qa_rapi.TestRapiInstanceRenameAndBack,
+ rename_source, rename_source)
+ if rename_target is not None:
+ # perform instance rename to a different name, if we have one configured
+ RunTest(qa_instance.TestInstanceRenameAndBack,
+ rename_source, rename_target)
+ RunTestIf("rapi", qa_rapi.TestRapiInstanceRenameAndBack,
+ rename_source, rename_target)
finally:
- cmd = ['rm', '-f', script]
- AssertEqual(StartSSH(master['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
- finally:
- for inst in instances:
- ReleaseInstance(inst)
+ qa_config.ReleaseInstance(tgt_instance)
+ RunTestIf(["instance-grow-disk"], qa_instance.TestInstanceGrowDisk, instance)
-def TestClusterMasterFailover():
- """gnt-cluster masterfailover"""
- master = GetMasterNode()
+ # and now start the instance again
+ RunTestIf(DOWN_TESTS, qa_instance.TestInstanceStartup, instance)
- failovermaster = AcquireNode(exclude=master)
- try:
- cmd = ['gnt-cluster', 'masterfailover']
- AssertEqual(StartSSH(failovermaster['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
-
- cmd = ['gnt-cluster', 'masterfailover']
- AssertEqual(StartSSH(master['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
- finally:
- ReleaseNode(failovermaster)
-
-
-def TestClusterCopyfile():
- """gnt-cluster copyfile"""
- master = GetMasterNode()
-
- # Create temporary file
- f = tempfile.NamedTemporaryFile()
- f.write("I'm a testfile.\n")
- f.flush()
- f.seek(0)
-
- # Upload file to master node
- testname = UploadFile(master['primary'], f.name)
- try:
- # Copy file to all nodes
- cmd = ['gnt-cluster', 'copyfile', testname]
- AssertEqual(StartSSH(master['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
- finally:
- # Remove file from all nodes
- for node in cfg['nodes']:
- cmd = ['rm', '-f', testname]
- AssertEqual(StartSSH(node['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+ RunTestIf("instance-reboot", qa_instance.TestInstanceReboot, instance)
+ RunTestIf("tags", qa_tags.TestInstanceTags, instance)
-def TestClusterDestroy():
- """gnt-cluster destroy"""
- cmd = ['gnt-cluster', 'destroy', '--yes-do-it']
- AssertEqual(StartSSH(GetMasterNode()['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
-# }}}
+ RunTestIf("cluster-verify", qa_cluster.TestClusterVerify)
-# {{{ Node tests
-def _NodeAdd(node):
- if node.get('_added', False):
- raise Error("Node %s already in cluster" % node['primary'])
+ RunTestIf("rapi", qa_rapi.TestInstance, instance)
- cmd = ['gnt-node', 'add']
- if node.get('secondary', None):
- cmd.append('--secondary-ip=%s' % node['secondary'])
- cmd.append(node['primary'])
- AssertEqual(StartSSH(GetMasterNode()['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+ # Lists instances, too
+ RunTestIf("node-list", qa_node.TestNodeList)
- node['_added'] = True
+ # Some jobs have been run, let's test listing them
+ RunTestIf("job-list", qa_job.TestJobList)
-def TestNodeAddAll():
- """Adding all nodes to cluster."""
- master = GetMasterNode()
- for node in cfg['nodes']:
- if node != master:
- _NodeAdd(node)
+def RunCommonNodeTests():
+ """Run a few common node tests.
-
-def _NodeRemove(node):
- cmd = ['gnt-node', 'remove', node['primary']]
- AssertEqual(StartSSH(GetMasterNode()['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
- node['_added'] = False
-
-
-def TestNodeRemoveAll():
- """Removing all nodes from cluster."""
- master = GetMasterNode()
- for node in cfg['nodes']:
- if node != master:
- _NodeRemove(node)
-
-
-def TestNodeInfo():
- """gnt-node info"""
- cmd = ['gnt-node', 'info']
- AssertEqual(StartSSH(GetMasterNode()['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+ """
+ RunTestIf("node-volumes", qa_node.TestNodeVolumes)
+ RunTestIf("node-storage", qa_node.TestNodeStorage)
+ RunTestIf("node-oob", qa_node.TestOutOfBand)
-def TestNodeVolumes():
- """gnt-node volumes"""
- cmd = ['gnt-node', 'volumes']
- AssertEqual(StartSSH(GetMasterNode()['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
-# }}}
+def RunGroupListTests():
+ """Run tests for listing node groups.
-# {{{ Instance tests
-def _DiskTest(node, instance, args):
- cmd = ['gnt-instance', 'add',
- '--os-type=%s' % cfg['os'],
- '--os-size=%s' % cfg['os-size'],
- '--swap-size=%s' % cfg['swap-size'],
- '--memory=%s' % cfg['mem'],
- '--node=%s' % node['primary']]
- if args:
- cmd += args
- cmd.append(instance['name'])
+ """
+ RunTestIf("group-list", qa_group.TestGroupList)
+ RunTestIf("group-list", qa_group.TestGroupListFields)
- AssertEqual(StartSSH(GetMasterNode()['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
- return instance
+def RunGroupRwTests():
+ """Run tests for adding/removing/renaming groups.
-def TestInstanceAddWithPlainDisk(node):
- """gnt-instance add -t plain"""
- return _DiskTest(node, AcquireInstance(), ['--disk-template=plain'])
+ """
+ RunTestIf("group-rwops", qa_group.TestGroupAddRemoveRename)
+ RunTestIf("group-rwops", qa_group.TestGroupAddWithOptions)
+ RunTestIf("group-rwops", qa_group.TestGroupModify)
+ RunTestIf(["group-rwops", "rapi"], qa_rapi.TestRapiNodeGroups)
+ RunTestIf(["group-rwops", "tags"], qa_tags.TestGroupTags,
+ qa_group.GetDefaultGroup())
-def TestInstanceAddWithLocalMirrorDisk(node):
- """gnt-instance add -t local_raid1"""
- return _DiskTest(node, AcquireInstance(), ['--disk-template=local_raid1'])
+def RunExportImportTests(instance, pnode, snode):
+ """Tries to export and import the instance.
+ @param pnode: current primary node of the instance
+ @param snode: current secondary node of the instance, if any,
+ otherwise None
-def TestInstanceAddWithRemoteRaidDisk(node, node2):
- """gnt-instance add -t remote_raid1"""
- return _DiskTest(node, AcquireInstance(),
- ['--disk-template=remote_raid1',
- '--secondary-node=%s' % node2['primary']])
+ """
+ if qa_config.TestEnabled("instance-export"):
+ RunTest(qa_instance.TestInstanceExportNoTarget, instance)
+ expnode = qa_config.AcquireNode(exclude=pnode)
+ try:
+ name = RunTest(qa_instance.TestInstanceExport, instance, expnode)
+
+ RunTest(qa_instance.TestBackupList, expnode)
+
+ if qa_config.TestEnabled("instance-import"):
+ newinst = qa_config.AcquireInstance()
+ try:
+ RunTest(qa_instance.TestInstanceImport, newinst, pnode,
+ expnode, name)
+ # Check if starting the instance works
+ RunTest(qa_instance.TestInstanceStartup, newinst)
+ RunTest(qa_instance.TestInstanceRemove, newinst)
+ finally:
+ qa_config.ReleaseInstance(newinst)
+ finally:
+ qa_config.ReleaseNode(expnode)
-def TestInstanceRemove(instance):
- """gnt-instance remove"""
- cmd = ['gnt-instance', 'remove', '-f', instance['name']]
- AssertEqual(StartSSH(GetMasterNode()['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+ if qa_config.TestEnabled(["rapi", "inter-cluster-instance-move"]):
+ newinst = qa_config.AcquireInstance()
+ try:
+ if snode is None:
+ excl = [pnode]
+ else:
+ excl = [pnode, snode]
+ tnode = qa_config.AcquireNode(exclude=excl)
+ try:
+ RunTest(qa_rapi.TestInterClusterInstanceMove, instance, newinst,
+ pnode, snode, tnode)
+ finally:
+ qa_config.ReleaseNode(tnode)
+ finally:
+ qa_config.ReleaseInstance(newinst)
- ReleaseInstance(instance)
+def RunDaemonTests(instance):
+ """Test the ganeti-watcher script.
-def TestInstanceStartup(instance):
- """gnt-instance startup"""
- cmd = ['gnt-instance', 'startup', instance['name']]
- AssertEqual(StartSSH(GetMasterNode()['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+ """
+ RunTest(qa_daemon.TestPauseWatcher)
+ RunTestIf("instance-automatic-restart",
+ qa_daemon.TestInstanceAutomaticRestart, instance)
+ RunTestIf("instance-consecutive-failures",
+ qa_daemon.TestInstanceConsecutiveFailures, instance)
-def TestInstanceShutdown(instance):
- """gnt-instance shutdown"""
- cmd = ['gnt-instance', 'shutdown', instance['name']]
- AssertEqual(StartSSH(GetMasterNode()['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+ RunTest(qa_daemon.TestResumeWatcher)
-def TestInstanceFailover(instance):
- """gnt-instance failover"""
- cmd = ['gnt-instance', 'failover', '--force', instance['name']]
- AssertEqual(StartSSH(GetMasterNode()['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+def RunSingleHomedHardwareFailureTests(instance, pnode):
+ """Test hardware failure recovery for single-homed instances.
+ """
+ if qa_config.TestEnabled("instance-recreate-disks"):
+ othernode = qa_config.AcquireNode(exclude=[pnode])
+ try:
+ RunTest(qa_instance.TestRecreateDisks,
+ instance, pnode, None, [othernode])
+ finally:
+ qa_config.ReleaseNode(othernode)
-def TestInstanceInfo(instance):
- """gnt-instance info"""
- cmd = ['gnt-instance', 'info', instance['name']]
- AssertEqual(StartSSH(GetMasterNode()['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
-# }}}
-# {{{ Daemon tests
-def _ResolveInstanceName(instance):
- """Gets the full Xen name of an instance.
+def RunHardwareFailureTests(instance, pnode, snode):
+ """Test cluster internal hardware failure recovery.
"""
- master = GetMasterNode()
+ RunTestIf("instance-failover", qa_instance.TestInstanceFailover, instance)
+ RunTestIf(["instance-failover", "rapi"],
+ qa_rapi.TestRapiInstanceFailover, instance)
- info_cmd = utils.ShellQuoteArgs(['gnt-instance', 'info', instance['name']])
- sed_cmd = utils.ShellQuoteArgs(['sed', '-n', '-e', 's/^Instance name: *//p'])
+ RunTestIf("instance-migrate", qa_instance.TestInstanceMigrate, instance)
+ RunTestIf(["instance-migrate", "rapi"],
+ qa_rapi.TestRapiInstanceMigrate, instance)
- cmd = '%s | %s' % (info_cmd, sed_cmd)
- p = subprocess.Popen(GetSSHCommand(master['primary'], cmd), shell=False,
- stdout=subprocess.PIPE)
- AssertEqual(p.wait(), 0)
+ if qa_config.TestEnabled("instance-replace-disks"):
+ othernode = qa_config.AcquireNode(exclude=[pnode, snode])
+ try:
+ RunTestIf("rapi", qa_rapi.TestRapiInstanceReplaceDisks, instance)
+ RunTest(qa_instance.TestReplaceDisks,
+ instance, pnode, snode, othernode)
+ finally:
+ qa_config.ReleaseNode(othernode)
- return p.stdout.read().strip()
+ if qa_config.TestEnabled("instance-recreate-disks"):
+ othernode1 = qa_config.AcquireNode(exclude=[pnode, snode])
+ try:
+ othernode2 = qa_config.AcquireNode(exclude=[pnode, snode, othernode1])
+ except qa_error.OutOfNodesError:
+ # Let's reuse one of the nodes if the cluster is not big enough
+ othernode2 = pnode
+ try:
+ RunTest(qa_instance.TestRecreateDisks,
+ instance, pnode, snode, [othernode1, othernode2])
+ finally:
+ qa_config.ReleaseNode(othernode1)
+ if othernode2 != pnode:
+ qa_config.ReleaseNode(othernode2)
+ RunTestIf("node-evacuate", qa_node.TestNodeEvacuate, pnode, snode)
-def _InstanceRunning(node, name):
- """Checks whether an instance is running.
+ RunTestIf("node-failover", qa_node.TestNodeFailover, pnode, snode)
- Args:
- node: Node the instance runs on
- name: Full name of Xen instance
- """
- cmd = utils.ShellQuoteArgs(['xm', 'list', name]) + ' >/dev/null'
- ret = StartSSH(node['primary'], cmd).wait()
- return ret == 0
+ RunTestIf("instance-disk-failure", qa_instance.TestInstanceMasterDiskFailure,
+ instance, pnode, snode)
+ RunTestIf("instance-disk-failure",
+ qa_instance.TestInstanceSecondaryDiskFailure, instance,
+ pnode, snode)
-def _XmShutdownInstance(node, name):
- """Shuts down instance using "xm" and waits for completion.
+def RunExclusiveStorageTests():
+ """Test exclusive storage."""
+ if not qa_config.TestEnabled("cluster-exclusive-storage"):
+ return
- Args:
- node: Node the instance runs on
- name: Full name of Xen instance
- """
- cmd = ['xm', 'shutdown', name]
- AssertEqual(StartSSH(GetMasterNode()['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
-
- # Wait up to a minute
- end = time.time() + 60
- while time.time() <= end:
- if not _InstanceRunning(node, name):
- break
- time.sleep(5)
- else:
- raise Error("xm shutdown failed")
+ node = qa_config.AcquireNode()
+ try:
+ old_es = qa_cluster.TestSetExclStorCluster(False)
+ qa_cluster.TestExclStorSingleNode(node)
+
+ qa_cluster.TestSetExclStorCluster(True)
+ qa_cluster.TestExclStorSharedPv(node)
+
+ if qa_config.TestEnabled("instance-add-plain-disk"):
+ # Make sure that the cluster doesn't have any pre-existing problem
+ qa_cluster.AssertClusterVerify()
+ instance1 = qa_instance.TestInstanceAddWithPlainDisk(node)
+ instance2 = qa_instance.TestInstanceAddWithPlainDisk(node)
+ # cluster-verify checks that disks are allocated correctly
+ qa_cluster.AssertClusterVerify()
+ qa_instance.TestInstanceRemove(instance1)
+ qa_instance.TestInstanceRemove(instance2)
+ if qa_config.TestEnabled("instance-add-drbd-disk"):
+ snode = qa_config.AcquireNode()
+ try:
+ qa_cluster.TestSetExclStorCluster(False)
+ instance = qa_instance.TestInstanceAddWithDrbdDisk(node, snode)
+ qa_cluster.TestSetExclStorCluster(True)
+ exp_err = [constants.CV_EINSTANCEUNSUITABLENODE]
+ qa_cluster.AssertClusterVerify(fail=True, errors=exp_err)
+ qa_instance.TestInstanceRemove(instance)
+ finally:
+ qa_config.ReleaseNode(snode)
+ qa_cluster.TestSetExclStorCluster(old_es)
+ finally:
+ qa_config.ReleaseNode(node)
-def _ResetWatcherDaemon(node):
- """Removes the watcher daemon's state file.
+def RunQa():
+ """Main QA body.
- Args:
- node: Node to be reset
"""
- cmd = ['rm', '-f', constants.WATCHER_STATEFILE]
- AssertEqual(StartSSH(node['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+ rapi_user = "ganeti-qa"
+ rapi_secret = utils.GenerateSecret()
+ RunEnvTests()
+ SetupCluster(rapi_user, rapi_secret)
-def TestInstanceAutomaticRestart(node, instance):
- """Test automatic restart of instance by ganeti-watcher.
+ # Load RAPI certificate
+ qa_rapi.Setup(rapi_user, rapi_secret)
- Note: takes up to 6 minutes to complete.
- """
- master = GetMasterNode()
- inst_name = _ResolveInstanceName(instance)
+ RunClusterTests()
+ RunOsTests()
- _ResetWatcherDaemon(node)
- _XmShutdownInstance(node, inst_name)
+ RunTestIf("tags", qa_tags.TestClusterTags)
- # Give it a bit more than five minutes to start again
- restart_at = time.time() + 330
+ RunCommonNodeTests()
+ RunGroupListTests()
+ RunGroupRwTests()
- # Wait until it's running again
- while time.time() <= restart_at:
- if _InstanceRunning(node, inst_name):
- break
- time.sleep(15)
- else:
- raise Error("Daemon didn't restart instance in time")
+ # The master shouldn't be readded or put offline; "delay" needs a non-master
+ # node to test
+ pnode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
+ try:
+ RunTestIf("node-readd", qa_node.TestNodeReadd, pnode)
+ RunTestIf("node-modify", qa_node.TestNodeModify, pnode)
+ RunTestIf("delay", qa_cluster.TestDelay, pnode)
+ finally:
+ qa_config.ReleaseNode(pnode)
- cmd = ['gnt-instance', 'info', inst_name]
- AssertEqual(StartSSH(master['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
+ pnode = qa_config.AcquireNode()
+ try:
+ RunTestIf("tags", qa_tags.TestNodeTags, pnode)
+
+ if qa_rapi.Enabled():
+ RunTest(qa_rapi.TestNode, pnode)
+
+ if qa_config.TestEnabled("instance-add-plain-disk"):
+ for use_client in [True, False]:
+ rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode,
+ use_client)
+ if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
+ RunCommonInstanceTests(rapi_instance)
+ RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
+ del rapi_instance
+
+ if qa_config.TestEnabled("instance-add-plain-disk"):
+ instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, pnode)
+ RunCommonInstanceTests(instance)
+ RunGroupListTests()
+ RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
+ RunExportImportTests(instance, pnode, None)
+ RunDaemonTests(instance)
+ RunRepairDiskSizes()
+ RunSingleHomedHardwareFailureTests(instance, pnode)
+ RunTest(qa_instance.TestInstanceRemove, instance)
+ del instance
+ multinode_tests = [
+ ("instance-add-drbd-disk",
+ qa_instance.TestInstanceAddWithDrbdDisk),
+ ]
+
+ for name, func in multinode_tests:
+ if qa_config.TestEnabled(name):
+ snode = qa_config.AcquireNode(exclude=pnode)
+ try:
+ instance = RunTest(func, pnode, snode)
+ RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, pnode)
+ RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, snode)
+ RunCommonInstanceTests(instance)
+ RunGroupListTests()
+ RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit,
+ constants.INITIAL_NODE_GROUP_NAME,
+ pnode["primary"], snode["primary"])
+ if qa_config.TestEnabled("instance-convert-disk"):
+ RunTest(qa_instance.TestInstanceShutdown, instance)
+ RunTest(qa_instance.TestInstanceConvertDisk, instance, snode)
+ RunTest(qa_instance.TestInstanceStartup, instance)
+ RunExportImportTests(instance, pnode, snode)
+ RunHardwareFailureTests(instance, pnode, snode)
+ RunRepairDiskSizes()
+ RunTest(qa_instance.TestInstanceRemove, instance)
+ del instance
+ finally:
+ qa_config.ReleaseNode(snode)
-def TestInstanceConsecutiveFailures(node, instance):
- """Test five consecutive instance failures.
+ finally:
+ qa_config.ReleaseNode(pnode)
- Note: takes at least 35 minutes to complete.
- """
- master = GetMasterNode()
- inst_name = _ResolveInstanceName(instance)
+ # Test removing instance with offline drbd secondary
+ if qa_config.TestEnabled("instance-remove-drbd-offline"):
+ # Make sure the master is not put offline
+ snode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
+ try:
+ pnode = qa_config.AcquireNode(exclude=snode)
+ try:
+ instance = qa_instance.TestInstanceAddWithDrbdDisk(pnode, snode)
+ qa_node.MakeNodeOffline(snode, "yes")
+ try:
+ RunTest(qa_instance.TestInstanceRemove, instance)
+ finally:
+ qa_node.MakeNodeOffline(snode, "no")
+ finally:
+ qa_config.ReleaseNode(pnode)
+ finally:
+ qa_config.ReleaseNode(snode)
- _ResetWatcherDaemon(node)
- _XmShutdownInstance(node, inst_name)
+ pnode = qa_config.AcquireNode()
+ try:
+ if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]):
+ for shutdown in [False, True]:
+ instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, pnode)
+ expnode = qa_config.AcquireNode(exclude=pnode)
+ try:
+ if shutdown:
+ # Stop instance before exporting and removing it
+ RunTest(qa_instance.TestInstanceShutdown, instance)
+ RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode)
+ RunTest(qa_instance.TestBackupList, expnode)
+ finally:
+ qa_config.ReleaseNode(expnode)
+ del expnode
+ del instance
- # Do shutdowns for 30 minutes
- finished_at = time.time() + (35 * 60)
+ finally:
+ qa_config.ReleaseNode(pnode)
- while time.time() <= finished_at:
- if _InstanceRunning(node, inst_name):
- _XmShutdownInstance(node, inst_name)
- time.sleep(30)
+ RunExclusiveStorageTests()
- # Check for some time whether the instance doesn't start again
- check_until = time.time() + 330
- while time.time() <= check_until:
- if _InstanceRunning(node, inst_name):
- raise Error("Instance started when it shouldn't")
- time.sleep(30)
+ RunTestIf("create-cluster", qa_node.TestNodeRemoveAll)
- cmd = ['gnt-instance', 'info', inst_name]
- AssertEqual(StartSSH(master['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
-# }}}
+ RunTestIf("cluster-destroy", qa_cluster.TestClusterDestroy)
-# {{{ Other tests
-def TestUploadKnownHostsFile(localpath):
- """Uploading known_hosts file.
- """
- master = GetMasterNode()
+@UsesRapiClient
+def main():
+ """Main program.
- tmpfile = UploadFile(master['primary'], localpath)
- try:
- cmd = ['mv', tmpfile, constants.SSH_KNOWN_HOSTS_FILE]
- AssertEqual(StartSSH(master['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
- except:
- cmd = ['rm', '-f', tmpfile]
- AssertEqual(StartSSH(master['primary'],
- utils.ShellQuoteArgs(cmd)).wait(), 0)
- raise
-# }}}
-
-# {{{ Main program
-if __name__ == '__main__':
- # {{{ Option parsing
- parser = OptionParser(usage="%prog [options] <config-file> "
- "<known-hosts-file>")
- parser.add_option('--dry-run', dest='dry_run',
- action="store_true",
- help="Show what would be done")
- parser.add_option('--yes-do-it', dest='yes_do_it',
- action="store_true",
- help="Really execute the tests")
- (options, args) = parser.parse_args()
- # }}}
-
- if len(args) == 2:
- (config_file, known_hosts_file) = args
+ """
+ parser = optparse.OptionParser(usage="%prog [options] <config-file>")
+ parser.add_option("--yes-do-it", dest="yes_do_it",
+ action="store_true",
+ help="Really execute the tests")
+ (qa_config.options, args) = parser.parse_args()
+
+ if len(args) == 1:
+ (config_file, ) = args
else:
- parser.error("Not enough arguments.")
+ parser.error("Wrong number of arguments.")
- if not options.yes_do_it:
+ if not qa_config.options.yes_do_it:
print ("Executing this script irreversibly destroys any Ganeti\n"
"configuration on all nodes involved. If you really want\n"
"to start testing, supply the --yes-do-it option.")
sys.exit(1)
- f = open(config_file, 'r')
- try:
- cfg = yaml.load(f.read())
- finally:
- f.close()
-
- RunTest(TestConfig)
-
- RunTest(TestUploadKnownHostsFile, known_hosts_file)
-
- if TestEnabled('env'):
- RunTest(TestSshConnection)
- RunTest(TestIcmpPing)
- RunTest(TestGanetiCommands)
-
- RunTest(TestClusterInit)
-
- RunTest(TestNodeAddAll)
-
- if TestEnabled('cluster-verify'):
- RunTest(TestClusterVerify)
+ qa_config.Load(config_file)
- if TestEnabled('cluster-info'):
- RunTest(TestClusterInfo)
-
- if TestEnabled('cluster-copyfile'):
- RunTest(TestClusterCopyfile)
-
- if TestEnabled('node-info'):
- RunTest(TestNodeInfo)
-
- if TestEnabled('cluster-burnin'):
- RunTest(TestClusterBurnin)
-
- if TestEnabled('cluster-master-failover'):
- RunTest(TestClusterMasterFailover)
-
- node = AcquireNode()
+ primary = qa_config.GetMasterNode()["primary"]
+ qa_utils.StartMultiplexer(primary)
+ print ("SSH command for primary node: %s" %
+ utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, "")))
+ print ("SSH command for other nodes: %s" %
+ utils.ShellQuoteArgs(qa_utils.GetSSHCommand("NODE", "")))
try:
- if TestEnabled('instance-add-plain-disk'):
- instance = RunTest(TestInstanceAddWithPlainDisk, node)
- RunTest(TestInstanceShutdown, instance)
- RunTest(TestInstanceStartup, instance)
-
- if TestEnabled('instance-info'):
- RunTest(TestInstanceInfo, instance)
-
- if TestEnabled('instance-automatic-restart'):
- RunTest(TestInstanceAutomaticRestart, node, instance)
-
- if TestEnabled('instance-consecutive-failures'):
- RunTest(TestInstanceConsecutiveFailures, node, instance)
-
- if TestEnabled('node-volumes'):
- RunTest(TestNodeVolumes)
-
- RunTest(TestInstanceRemove, instance)
- del instance
-
- if TestEnabled('instance-add-local-mirror-disk'):
- instance = RunTest(TestInstanceAddWithLocalMirrorDisk, node)
- RunTest(TestInstanceShutdown, instance)
- RunTest(TestInstanceStartup, instance)
-
- if TestEnabled('instance-info'):
- RunTest(TestInstanceInfo, instance)
-
- if TestEnabled('node-volumes'):
- RunTest(TestNodeVolumes)
-
- RunTest(TestInstanceRemove, instance)
- del instance
-
- if TestEnabled('instance-add-remote-raid-disk'):
- node2 = AcquireNode(exclude=node)
- try:
- instance = RunTest(TestInstanceAddWithRemoteRaidDisk, node, node2)
- RunTest(TestInstanceShutdown, instance)
- RunTest(TestInstanceStartup, instance)
-
- if TestEnabled('instance-info'):
- RunTest(TestInstanceInfo, instance)
-
- if TestEnabled('instance-failover'):
- RunTest(TestInstanceFailover, instance)
-
- if TestEnabled('node-volumes'):
- RunTest(TestNodeVolumes)
-
- RunTest(TestInstanceRemove, instance)
- del instance
- finally:
- ReleaseNode(node2)
-
+ RunQa()
finally:
- ReleaseNode(node)
-
- RunTest(TestNodeRemoveAll)
-
- if TestEnabled('cluster-destroy'):
- RunTest(TestClusterDestroy)
-# }}}
+ qa_utils.CloseMultiplexers()
-# vim: foldmethod=marker :
+if __name__ == "__main__":
+ main()