import sys
import optparse
import time
+import socket
+import urllib2
+import errno
from itertools import izip, islice, cycle
from cStringIO import StringIO
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
+class InstanceDown(Exception):
+ """The checked instance was not up"""
+
+
def Usage():
"""Shows program usage information and exits the program."""
sys.exit(2)
-def Log(msg):
+def Log(msg, indent=0):
"""Simple function that prints out its argument.
"""
- print msg
+ headers = {
+ 0: "- ",
+ 1: "* ",
+ 2: ""
+ }
+ sys.stdout.write("%*s%s%s\n" % (2*indent, "",
+ headers.get(indent, " "), msg))
sys.stdout.flush()
+def Err(msg, exit_code=1):
+ """Simple error logging that prints to stderr.
+
+ """
+ sys.stderr.write(msg + "\n")
+ sys.stderr.flush()
+ sys.exit(exit_code)
class Burner(object):
"""Burner class."""
self._feed_buf.write("%s %s\n" % (time.ctime(utils.MergeTime(msg[0])),
msg[2]))
if self.opts.verbose:
- Log(msg)
+ Log(msg, indent=3)
def ExecOp(self, op):
"""Execute an opcode and manage the exec buffer."""
"""
self.ClearFeedbackBuf()
job_ids = [cli.SendJob(job, cl=self.cl) for job in jobs]
- Log("- Submitted job IDs %s" % ", ".join(job_ids))
+ Log("Submitted job IDs %s" % ", ".join(job_ids), indent=1)
results = []
for jid in job_ids:
- Log("- Waiting for job %s" % jid)
+ Log("Waiting for job %s" % jid, indent=2)
results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
return results
parser.add_option("--no-startstop", dest="do_startstop",
help="Skip instance stop/start", action="store_false",
default=True)
+ parser.add_option("--no-reinstall", dest="do_reinstall",
+ help="Skip instance reinstall", action="store_false",
+ default=True)
+ parser.add_option("--no-reboot", dest="do_reboot",
+ help="Skip instance reboot", action="store_false",
+ default=True)
+ parser.add_option("--no-activate-disks", dest="do_activate_disks",
+ help="Skip disk activation/deactivation",
+ action="store_false", default=True)
+ parser.add_option("--no-add-disks", dest="do_addremove_disks",
+ help="Skip disk addition/removal",
+ action="store_false", default=True)
+ parser.add_option("--no-add-nics", dest="do_addremove_nics",
+ help="Skip NIC addition/removal",
+ action="store_false", default=True)
+ parser.add_option("--no-nics", dest="nics",
+ help="No network interfaces", action="store_const",
+ const=[], default=[{}])
parser.add_option("--rename", dest="rename", default=None,
help="Give one unused instance name which is taken"
" to start the renaming sequence",
dest="parallel",
help="Enable parallelization of some operations in"
" order to speed burnin or to test granular locking")
+ parser.add_option("--net-timeout", default=15, type="int",
+ dest="net_timeout",
+ help="The instance check network timeout in seconds"
+ " (defaults to 15 seconds)")
+ parser.add_option("-C", "--http-check", default=False, action="store_true",
+ dest="http_check",
+ help="Enable checking of instance status via http,"
+ " looking for /hostname.txt that should contain the"
+ " name of the instance")
+
options, args = parser.parse_args()
if len(args) < 1 or options.os is None:
constants.DT_PLAIN,
constants.DT_DRBD8)
if options.disk_template not in supported_disk_templates:
- Log("Unknown disk template '%s'" % options.disk_template)
- sys.exit(1)
-
- disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
- disk_growth = [utils.ParseUnit(v) for v in options.disk_growth.split(",")]
- if len(disk_growth) != len(disk_size):
- Log("Wrong disk sizes/growth combination")
- sys.exit(1)
+ Err("Unknown disk template '%s'" % options.disk_template)
+
+ if options.disk_template == constants.DT_DISKLESS:
+ disk_size = disk_growth = []
+ options.do_addremove_disks = False
+ else:
+ disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
+ disk_growth = [utils.ParseUnit(v)
+ for v in options.disk_growth.split(",")]
+ if len(disk_growth) != len(disk_size):
+ Err("Wrong disk sizes/growth combination")
if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
(not disk_size and options.disk_template != constants.DT_DISKLESS)):
- Log("Wrong disk count/disk template combination")
- sys.exit(1)
+ Err("Wrong disk count/disk template combination")
self.disk_size = disk_size
self.disk_growth = disk_growth
self.disk_count = len(disk_size)
if options.nodes and options.iallocator:
- Log("Give either the nodes option or the iallocator option, not both")
- sys.exit(1)
+ Err("Give either the nodes option or the iallocator option, not both")
self.opts = options
self.instances = args
}
self.hvp = {}
+ socket.setdefaulttimeout(options.net_timeout)
+
def GetState(self):
"""Read the cluster state from the config."""
if self.opts.nodes:
else:
names = []
try:
- op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
+ op = opcodes.OpQueryNodes(output_fields=["name", "offline"], names=names)
result = self.ExecOp(op)
except errors.GenericError, err:
err_code, msg = cli.FormatError(err)
- Log(msg)
- sys.exit(err_code)
- self.nodes = [data[0] for data in result]
+ Err(msg, exit_code=err_code)
+ self.nodes = [data[0] for data in result if not data[1]]
result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
names=[]))
if not result:
- Log("Can't get the OS list")
- sys.exit(1)
+ Err("Can't get the OS list")
# filter non-valid OS-es
os_set = [val[0] for val in result if val[1]]
if self.opts.os not in os_set:
- Log("OS '%s' not found" % self.opts.os)
- sys.exit(1)
+ Err("OS '%s' not found" % self.opts.os)
def CreateInstances(self):
"""Create the given instances.
self.instances)
jobset = []
+ Log("Creating instances")
for pnode, snode, instance in mytor:
+ Log("instance %s" % instance, indent=1)
if self.opts.iallocator:
pnode = snode = None
- Log("- Add instance %s (iallocator: %s)" %
- (instance, self.opts.iallocator))
+ msg = "with iallocator %s" % self.opts.iallocator
elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
snode = None
- Log("- Add instance %s on node %s" % (instance, pnode))
+ msg = "on %s" % pnode
else:
- Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode))
+ msg = "on %s, %s" % (pnode, snode)
+
+ Log(msg, indent=2)
op = opcodes.OpCreateInstance(instance_name=instance,
disks = [ {"size": size}
for size in self.disk_size],
disk_template=self.opts.disk_template,
- nics=[{}],
+ nics=self.opts.nics,
mode=constants.INSTANCE_CREATE,
os_type=self.opts.os,
pnode=pnode,
if self.opts.parallel:
self.ExecJobSet(jobset)
+ for instance in self.instances:
+ self._CheckInstanceAlive(instance)
+
def GrowDisks(self):
"""Grow both the os and the swap disks by the requested amount, if any."""
+ Log("Growing disks")
for instance in self.instances:
+ Log("instance %s" % instance, indent=1)
for idx, growth in enumerate(self.disk_growth):
if growth > 0:
op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
amount=growth, wait_for_sync=True)
- Log("- Increase %s's disk/%s by %s MB" % (instance, idx, growth))
+ Log("increase disk/%s by %s MB" % (idx, growth), indent=2)
self.ExecOp(op)
def ReplaceDisks1D8(self):
"""Replace disks on primary and secondary for drbd8."""
+ Log("Replacing disks on the same nodes")
for instance in self.instances:
+ Log("instance %s" % instance, indent=1)
for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
op = opcodes.OpReplaceDisks(instance_name=instance,
mode=mode,
disks=[i for i in range(self.disk_count)])
- Log("- Replace disks (%s) for instance %s" % (mode, instance))
+ Log("run %s" % mode, indent=2)
self.ExecOp(op)
def ReplaceDisks2(self):
"""Replace secondary node."""
- mode = constants.REPLACE_DISK_SEC
+ Log("Changing the secondary node")
+ mode = constants.REPLACE_DISK_CHG
mytor = izip(islice(cycle(self.nodes), 2, None),
self.instances)
for tnode, instance in mytor:
+ Log("instance %s" % instance, indent=1)
if self.opts.iallocator:
tnode = None
+ msg = "with iallocator %s" % self.opts.iallocator
+ else:
+ msg = tnode
op = opcodes.OpReplaceDisks(instance_name=instance,
mode=mode,
remote_node=tnode,
iallocator=self.opts.iallocator,
disks=[i for i in range(self.disk_count)])
- Log("- Replace secondary (%s) for instance %s" % (mode, instance))
+ Log("run %s %s" % (mode, msg), indent=2)
self.ExecOp(op)
def Failover(self):
"""Failover the instances."""
-
+ Log("Failing over instances")
for instance in self.instances:
+ Log("instance %s" % instance, indent=1)
op = opcodes.OpFailoverInstance(instance_name=instance,
ignore_consistency=False)
- Log("- Failover instance %s" % (instance))
self.ExecOp(op)
+ for instance in self.instances:
+ self._CheckInstanceAlive(instance)
def ImportExport(self):
"""Export the instance, delete it, and import it back.
"""
-
+ Log("Exporting and re-importing instances")
mytor = izip(cycle(self.nodes),
islice(cycle(self.nodes), 1, None),
islice(cycle(self.nodes), 2, None),
self.instances)
for pnode, snode, enode, instance in mytor:
-
+ Log("instance %s" % instance, indent=1)
if self.opts.iallocator:
pnode = snode = None
- import_log_msg = ("- Import instance %s from node %s (iallocator: %s)" %
- (instance, enode, self.opts.iallocator))
+ import_log_msg = ("import from %s"
+ " with iallocator %s" %
+ (enode, self.opts.iallocator))
elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
snode = None
- import_log_msg = ("- Import instance %s from node %s to node %s" %
- (instance, enode, pnode))
+ import_log_msg = ("import from %s to %s" %
+ (enode, pnode))
else:
- import_log_msg = ("- Import instance %s from node %s to nodes %s/%s" %
- (instance, enode, pnode, snode))
+ import_log_msg = ("import from %s to %s, %s" %
+ (enode, pnode, snode))
exp_op = opcodes.OpExportInstance(instance_name=instance,
target_node=enode,
disks = [ {"size": size}
for size in self.disk_size],
disk_template=self.opts.disk_template,
- nics=[{}],
+ nics=self.opts.nics,
mode=constants.INSTANCE_IMPORT,
src_node=enode,
src_path=imp_dir,
erem_op = opcodes.OpRemoveExport(instance_name=instance)
- Log("- Export instance %s to node %s" % (instance, enode))
+ Log("export to node %s" % enode, indent=2)
self.ExecOp(exp_op)
- Log("- Remove instance %s" % (instance))
+ Log("remove instance", indent=2)
self.ExecOp(rem_op)
self.to_rem.remove(instance)
- Log(import_log_msg)
+ Log(import_log_msg, indent=2)
self.ExecOp(imp_op)
- Log("- Remove export of instance %s" % (instance))
+ Log("remove export", indent=2)
self.ExecOp(erem_op)
self.to_rem.append(instance)
+ for instance in self.instances:
+ self._CheckInstanceAlive(instance)
+
def StopInstance(self, instance):
"""Stop given instance."""
op = opcodes.OpShutdownInstance(instance_name=instance)
- Log("- Shutdown instance %s" % instance)
+ Log("shutdown", indent=2)
self.ExecOp(op)
def StartInstance(self, instance):
"""Start given instance."""
op = opcodes.OpStartupInstance(instance_name=instance, force=False)
- Log("- Start instance %s" % instance)
+ Log("startup", indent=2)
self.ExecOp(op)
def RenameInstance(self, instance, instance_new):
"""Rename instance."""
op = opcodes.OpRenameInstance(instance_name=instance,
new_name=instance_new)
- Log("- Rename instance %s to %s" % (instance, instance_new))
+ Log("rename to %s" % instance_new, indent=2)
self.ExecOp(op)
def StopStart(self):
"""Stop/start the instances."""
+ Log("Stopping and starting instances")
for instance in self.instances:
+ Log("instance %s" % instance, indent=1)
self.StopInstance(instance)
self.StartInstance(instance)
+ for instance in self.instances:
+ self._CheckInstanceAlive(instance)
+
def Remove(self):
"""Remove the instances."""
+ Log("Removing instances")
for instance in self.to_rem:
+ Log("instance %s" % instance, indent=1)
op = opcodes.OpRemoveInstance(instance_name=instance,
ignore_failures=True)
- Log("- Remove instance %s" % instance)
self.ExecOp(op)
-
def Rename(self):
"""Rename the instances."""
+ Log("Renaming instances")
rename = self.opts.rename
for instance in self.instances:
+ Log("instance %s" % instance, indent=1)
self.StopInstance(instance)
self.RenameInstance(instance, rename)
self.StartInstance(rename)
+ self._CheckInstanceAlive(rename)
self.StopInstance(rename)
self.RenameInstance(rename, instance)
self.StartInstance(instance)
+ for instance in self.instances:
+ self._CheckInstanceAlive(instance)
+
+ def Reinstall(self):
+ """Reinstall the instances."""
+ Log("Reinstalling instances")
+ for instance in self.instances:
+ Log("instance %s" % instance, indent=1)
+ self.StopInstance(instance)
+ op = opcodes.OpReinstallInstance(instance_name=instance)
+ Log("reinstall without passing the OS", indent=2)
+ self.ExecOp(op)
+ op = opcodes.OpReinstallInstance(instance_name=instance,
+ os_type=self.opts.os)
+ Log("reinstall specifying the OS", indent=2)
+ self.ExecOp(op)
+ self.StartInstance(instance)
+ for instance in self.instances:
+ self._CheckInstanceAlive(instance)
+
+ def Reboot(self):
+ """Reboot the instances."""
+ Log("Rebooting instances")
+ for instance in self.instances:
+ Log("instance %s" % instance, indent=1)
+ for reboot_type in constants.REBOOT_TYPES:
+ op = opcodes.OpRebootInstance(instance_name=instance,
+ reboot_type=reboot_type,
+ ignore_secondaries=False)
+ Log("reboot with type '%s'" % reboot_type, indent=2)
+ self.ExecOp(op)
+ self._CheckInstanceAlive(instance)
+
+ def ActivateDisks(self):
+ """Activate and deactivate disks of the instances."""
+ Log("Activating/deactivating disks")
+ for instance in self.instances:
+ Log("instance %s" % instance, indent=1)
+ op_act = opcodes.OpActivateInstanceDisks(instance_name=instance)
+ op_deact = opcodes.OpDeactivateInstanceDisks(instance_name=instance)
+ Log("activate disks when online", indent=2)
+ self.ExecOp(op_act)
+ self.StopInstance(instance)
+ Log("activate disks when offline", indent=2)
+ self.ExecOp(op_act)
+ Log("deactivate disks (when offline)", indent=2)
+ self.ExecOp(op_deact)
+ self.StartInstance(instance)
+ for instance in self.instances:
+ self._CheckInstanceAlive(instance)
+
+ def AddRemoveDisks(self):
+ """Add and remove an extra disk for the instances."""
+ Log("Adding and removing disks")
+ for instance in self.instances:
+ Log("instance %s" % instance, indent=1)
+ op_add = opcodes.OpSetInstanceParams(\
+ instance_name=instance,
+ disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})])
+ op_rem = opcodes.OpSetInstanceParams(\
+ instance_name=instance, disks=[(constants.DDM_REMOVE, {})])
+ Log("adding a disk", indent=2)
+ self.ExecOp(op_add)
+ self.StopInstance(instance)
+ Log("removing last disk", indent=2)
+ self.ExecOp(op_rem)
+ self.StartInstance(instance)
+ for instance in self.instances:
+ self._CheckInstanceAlive(instance)
+
+ def AddRemoveNICs(self):
+ """Add and remove an extra NIC for the instances."""
+ Log("Adding and removing NICs")
+ for instance in self.instances:
+ Log("instance %s" % instance, indent=1)
+ op_add = opcodes.OpSetInstanceParams(\
+ instance_name=instance, nics=[(constants.DDM_ADD, {})])
+ op_rem = opcodes.OpSetInstanceParams(\
+ instance_name=instance, nics=[(constants.DDM_REMOVE, {})])
+ Log("adding a NIC", indent=2)
+ self.ExecOp(op_add)
+ Log("removing last NIC", indent=2)
+ self.ExecOp(op_rem)
+
+ def _CheckInstanceAlive(self, instance):
+ """Check if an instance is alive by doing http checks.
+
+ This will try to retrieve the url on the instance /hostname.txt
+ and check that it contains the hostname of the instance. In case
+ we get ECONNREFUSED, we retry up to the net timeout seconds, for
+ any other error we abort.
+
+ """
+ if not self.opts.http_check:
+ return
+ try:
+ for retries in range(self.opts.net_timeout):
+ try:
+ url = urllib2.urlopen("http://%s/hostname.txt" % instance)
+ except urllib2.URLError, err:
+ if err.args[0][0] == errno.ECONNREFUSED:
+ time.sleep(1)
+ continue
+ raise
+ except urllib2.URLError, err:
+ raise InstanceDown(instance, str(err))
+ hostname = url.read().strip()
+ if hostname != instance:
+ raise InstanceDown(instance, ("Hostname mismatch, expected %s, got %s" %
+ (instance, hostname)))
+
def BurninCluster(self):
"""Test a cluster intensively.
opts = self.opts
- Log("- Testing global parameters")
+ Log("Testing global parameters")
if (len(self.nodes) == 1 and
opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
constants.DT_FILE)):
- Log("When one node is available/selected the disk template must"
+ Err("When one node is available/selected the disk template must"
" be 'diskless', 'file' or 'plain'")
- sys.exit(1)
has_err = True
try:
if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
self.Failover()
- if opts.do_importexport:
+ if (opts.do_importexport and
+ opts.disk_template not in (constants.DT_DISKLESS,
+ constants.DT_FILE)):
self.ImportExport()
- if opts.do_startstop:
- self.StopStart()
+ if opts.do_reinstall:
+ self.Reinstall()
+
+ if opts.do_reboot:
+ self.Reboot()
+
+ if opts.do_addremove_disks:
+ self.AddRemoveDisks()
+
+ if opts.do_addremove_nics:
+ self.AddRemoveNICs()
+
+ if opts.do_activate_disks:
+ self.ActivateDisks()
if opts.rename:
self.Rename()
+ if opts.do_startstop:
+ self.StopStart()
+
has_err = False
finally:
if has_err: