X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/08db7c5cd2748c55676018f2aad9499b0f5530dc..f96e3c4f756dfbe4cd63f77d5a5d06a7ae09be2b:/tools/burnin diff --git a/tools/burnin b/tools/burnin index d328d47..c316ca0 100755 --- a/tools/burnin +++ b/tools/burnin @@ -27,6 +27,9 @@ import os import sys import optparse import time +import socket +import urllib2 +import errno from itertools import izip, islice, cycle from cStringIO import StringIO @@ -41,6 +44,10 @@ from ganeti import utils USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...") +class InstanceDown(Exception): + """The checked instance was not up""" + + def Usage(): """Shows program usage information and exits the program.""" @@ -49,13 +56,26 @@ def Usage(): sys.exit(2) -def Log(msg): +def Log(msg, indent=0): """Simple function that prints out its argument. """ - print msg + headers = { + 0: "- ", + 1: "* ", + 2: "" + } + sys.stdout.write("%*s%s%s\n" % (2*indent, "", + headers.get(indent, " "), msg)) sys.stdout.flush() +def Err(msg, exit_code=1): + """Simple error logging that prints to stderr. + + """ + sys.stderr.write(msg + "\n") + sys.stderr.flush() + sys.exit(exit_code) class Burner(object): """Burner class.""" @@ -85,7 +105,7 @@ class Burner(object): self._feed_buf.write("%s %s\n" % (time.ctime(utils.MergeTime(msg[0])), msg[2])) if self.opts.verbose: - Log(msg) + Log(msg, indent=3) def ExecOp(self, op): """Execute an opcode and manage the exec buffer.""" @@ -102,10 +122,10 @@ class Burner(object): """ self.ClearFeedbackBuf() job_ids = [cli.SendJob(job, cl=self.cl) for job in jobs] - Log("- Submitted job IDs %s" % ", ".join(job_ids)) + Log("Submitted job IDs %s" % ", ".join(job_ids), indent=1) results = [] for jid in job_ids: - Log("- Waiting for job %s" % jid) + Log("Waiting for job %s" % jid, indent=2) results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback)) return results @@ -130,7 +150,7 @@ class Burner(object): help="Disk size (determines disk count)", default="128m", type="string", metavar="") parser.add_option("--disk-growth", dest="disk_growth", help="Disk growth", - default=128, type="string", metavar="") + default="128m", type="string", metavar="") parser.add_option("--mem-size", dest="mem_size", help="Memory size", default=128, type="unit", metavar="") parser.add_option("-v", "--verbose", @@ -151,6 +171,24 @@ class Burner(object): parser.add_option("--no-startstop", dest="do_startstop", help="Skip instance stop/start", action="store_false", default=True) + parser.add_option("--no-reinstall", dest="do_reinstall", + help="Skip instance reinstall", action="store_false", + default=True) + parser.add_option("--no-reboot", dest="do_reboot", + help="Skip instance reboot", action="store_false", + default=True) + parser.add_option("--no-activate-disks", dest="do_activate_disks", + help="Skip disk activation/deactivation", + action="store_false", default=True) + parser.add_option("--no-add-disks", dest="do_addremove_disks", + help="Skip disk addition/removal", + action="store_false", default=True) + parser.add_option("--no-add-nics", dest="do_addremove_nics", + help="Skip NIC addition/removal", + action="store_false", default=True) + parser.add_option("--no-nics", dest="nics", + help="No network interfaces", action="store_const", + const=[], default=[{}]) parser.add_option("--rename", dest="rename", default=None, help="Give one unused instance name which is taken" " to start the renaming sequence", @@ -172,6 +210,16 @@ class Burner(object): dest="parallel", help="Enable parallelization of some operations in" " order to speed burnin or to test granular locking") + parser.add_option("--net-timeout", default=15, type="int", + dest="net_timeout", + help="The instance check network timeout in seconds" + " (defaults to 15 seconds)") + parser.add_option("-C", "--http-check", default=False, action="store_true", + dest="http_check", + help="Enable checking of instance status via http," + " looking for /hostname.txt that should contain the" + " name of the instance") + options, args = parser.parse_args() if len(args) < 1 or options.os is None: @@ -182,26 +230,27 @@ class Burner(object): constants.DT_PLAIN, constants.DT_DRBD8) if options.disk_template not in supported_disk_templates: - Log("Unknown disk template '%s'" % options.disk_template) - sys.exit(1) - - disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")] - disk_growth = [utils.ParseUnit(v) for v in options.disk_growth.split(",")] - if len(disk_growth) != len(disk_size): - Log("Wrong disk sizes/growth combination") - sys.exit(1) + Err("Unknown disk template '%s'" % options.disk_template) + + if options.disk_template == constants.DT_DISKLESS: + disk_size = disk_growth = [] + options.do_addremove_disks = False + else: + disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")] + disk_growth = [utils.ParseUnit(v) + for v in options.disk_growth.split(",")] + if len(disk_growth) != len(disk_size): + Err("Wrong disk sizes/growth combination") if ((disk_size and options.disk_template == constants.DT_DISKLESS) or (not disk_size and options.disk_template != constants.DT_DISKLESS)): - Log("Wrong disk count/disk template combination") - sys.exit(1) + Err("Wrong disk count/disk template combination") self.disk_size = disk_size self.disk_growth = disk_growth self.disk_count = len(disk_size) if options.nodes and options.iallocator: - Log("Give either the nodes option or the iallocator option, not both") - sys.exit(1) + Err("Give either the nodes option or the iallocator option, not both") self.opts = options self.instances = args @@ -211,6 +260,8 @@ class Burner(object): } self.hvp = {} + socket.setdefaulttimeout(options.net_timeout) + def GetState(self): """Read the cluster state from the config.""" if self.opts.nodes: @@ -218,27 +269,24 @@ class Burner(object): else: names = [] try: - op = opcodes.OpQueryNodes(output_fields=["name"], names=names) + op = opcodes.OpQueryNodes(output_fields=["name", "offline"], names=names) result = self.ExecOp(op) except errors.GenericError, err: err_code, msg = cli.FormatError(err) - Log(msg) - sys.exit(err_code) - self.nodes = [data[0] for data in result] + Err(msg, exit_code=err_code) + self.nodes = [data[0] for data in result if not data[1]] result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"], names=[])) if not result: - Log("Can't get the OS list") - sys.exit(1) + Err("Can't get the OS list") # filter non-valid OS-es os_set = [val[0] for val in result if val[1]] if self.opts.os not in os_set: - Log("OS '%s' not found" % self.opts.os) - sys.exit(1) + Err("OS '%s' not found" % self.opts.os) def CreateInstances(self): """Create the given instances. @@ -250,22 +298,25 @@ class Burner(object): self.instances) jobset = [] + Log("Creating instances") for pnode, snode, instance in mytor: + Log("instance %s" % instance, indent=1) if self.opts.iallocator: pnode = snode = None - Log("- Add instance %s (iallocator: %s)" % - (instance, self.opts.iallocator)) + msg = "with iallocator %s" % self.opts.iallocator elif self.opts.disk_template not in constants.DTS_NET_MIRROR: snode = None - Log("- Add instance %s on node %s" % (instance, pnode)) + msg = "on %s" % pnode else: - Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode)) + msg = "on %s, %s" % (pnode, snode) + + Log(msg, indent=2) op = opcodes.OpCreateInstance(instance_name=instance, disks = [ {"size": size} for size in self.disk_size], disk_template=self.opts.disk_template, - nics=[{}], + nics=self.opts.nics, mode=constants.INSTANCE_CREATE, os_type=self.opts.os, pnode=pnode, @@ -291,76 +342,91 @@ class Burner(object): if self.opts.parallel: self.ExecJobSet(jobset) + for instance in self.instances: + self._CheckInstanceAlive(instance) + def GrowDisks(self): """Grow both the os and the swap disks by the requested amount, if any.""" + Log("Growing disks") for instance in self.instances: + Log("instance %s" % instance, indent=1) for idx, growth in enumerate(self.disk_growth): if growth > 0: op = opcodes.OpGrowDisk(instance_name=instance, disk=idx, amount=growth, wait_for_sync=True) - Log("- Increase %s's %s disk by %s MB" % (instance, idx, growth)) + Log("increase disk/%s by %s MB" % (idx, growth), indent=2) self.ExecOp(op) def ReplaceDisks1D8(self): """Replace disks on primary and secondary for drbd8.""" + Log("Replacing disks on the same nodes") for instance in self.instances: + Log("instance %s" % instance, indent=1) for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI: op = opcodes.OpReplaceDisks(instance_name=instance, mode=mode, disks=[i for i in range(self.disk_count)]) - Log("- Replace disks (%s) for instance %s" % (mode, instance)) + Log("run %s" % mode, indent=2) self.ExecOp(op) def ReplaceDisks2(self): """Replace secondary node.""" - mode = constants.REPLACE_DISK_SEC + Log("Changing the secondary node") + mode = constants.REPLACE_DISK_CHG mytor = izip(islice(cycle(self.nodes), 2, None), self.instances) for tnode, instance in mytor: + Log("instance %s" % instance, indent=1) if self.opts.iallocator: tnode = None + msg = "with iallocator %s" % self.opts.iallocator + else: + msg = tnode op = opcodes.OpReplaceDisks(instance_name=instance, mode=mode, remote_node=tnode, iallocator=self.opts.iallocator, disks=[i for i in range(self.disk_count)]) - Log("- Replace secondary (%s) for instance %s" % (mode, instance)) + Log("run %s %s" % (mode, msg), indent=2) self.ExecOp(op) def Failover(self): """Failover the instances.""" - + Log("Failing over instances") for instance in self.instances: + Log("instance %s" % instance, indent=1) op = opcodes.OpFailoverInstance(instance_name=instance, ignore_consistency=False) - Log("- Failover instance %s" % (instance)) self.ExecOp(op) + for instance in self.instances: + self._CheckInstanceAlive(instance) def ImportExport(self): """Export the instance, delete it, and import it back. """ - + Log("Exporting and re-importing instances") mytor = izip(cycle(self.nodes), islice(cycle(self.nodes), 1, None), islice(cycle(self.nodes), 2, None), self.instances) for pnode, snode, enode, instance in mytor: - + Log("instance %s" % instance, indent=1) if self.opts.iallocator: pnode = snode = None - import_log_msg = ("- Import instance %s from node %s (iallocator: %s)" % - (instance, enode, self.opts.iallocator)) + import_log_msg = ("import from %s" + " with iallocator %s" % + (enode, self.opts.iallocator)) elif self.opts.disk_template not in constants.DTS_NET_MIRROR: snode = None - import_log_msg = ("- Import instance %s from node %s to node %s" % - (instance, enode, pnode)) + import_log_msg = ("import from %s to %s" % + (enode, pnode)) else: - import_log_msg = ("- Import instance %s from node %s to nodes %s/%s" % - (instance, enode, pnode, snode)) + import_log_msg = ("import from %s to %s, %s" % + (enode, pnode, snode)) exp_op = opcodes.OpExportInstance(instance_name=instance, target_node=enode, @@ -372,9 +438,10 @@ class Burner(object): full_name = self.ExecOp(nam_op)[0][0] imp_dir = os.path.join(constants.EXPORT_DIR, full_name) imp_op = opcodes.OpCreateInstance(instance_name=instance, - disk_size=self.opts.os_size, - swap_size=self.opts.swap_size, + disks = [ {"size": size} + for size in self.disk_size], disk_template=self.opts.disk_template, + nics=self.opts.nics, mode=constants.INSTANCE_IMPORT, src_node=enode, src_path=imp_dir, @@ -383,9 +450,8 @@ class Burner(object): start=True, ip_check=True, wait_for_sync=True, - mac="auto", file_storage_dir=None, - file_driver=None, + file_driver="loop", iallocator=self.opts.iallocator, beparams=self.bep, hvparams=self.hvp, @@ -393,63 +459,185 @@ class Burner(object): erem_op = opcodes.OpRemoveExport(instance_name=instance) - Log("- Export instance %s to node %s" % (instance, enode)) + Log("export to node %s" % enode, indent=2) self.ExecOp(exp_op) - Log("- Remove instance %s" % (instance)) + Log("remove instance", indent=2) self.ExecOp(rem_op) self.to_rem.remove(instance) - Log(import_log_msg) + Log(import_log_msg, indent=2) self.ExecOp(imp_op) - Log("- Remove export of instance %s" % (instance)) + Log("remove export", indent=2) self.ExecOp(erem_op) self.to_rem.append(instance) + for instance in self.instances: + self._CheckInstanceAlive(instance) + def StopInstance(self, instance): """Stop given instance.""" op = opcodes.OpShutdownInstance(instance_name=instance) - Log("- Shutdown instance %s" % instance) + Log("shutdown", indent=2) self.ExecOp(op) def StartInstance(self, instance): """Start given instance.""" op = opcodes.OpStartupInstance(instance_name=instance, force=False) - Log("- Start instance %s" % instance) + Log("startup", indent=2) self.ExecOp(op) def RenameInstance(self, instance, instance_new): """Rename instance.""" op = opcodes.OpRenameInstance(instance_name=instance, new_name=instance_new) - Log("- Rename instance %s to %s" % (instance, instance_new)) + Log("rename to %s" % instance_new, indent=2) self.ExecOp(op) def StopStart(self): """Stop/start the instances.""" + Log("Stopping and starting instances") for instance in self.instances: + Log("instance %s" % instance, indent=1) self.StopInstance(instance) self.StartInstance(instance) + for instance in self.instances: + self._CheckInstanceAlive(instance) + def Remove(self): """Remove the instances.""" + Log("Removing instances") for instance in self.to_rem: + Log("instance %s" % instance, indent=1) op = opcodes.OpRemoveInstance(instance_name=instance, ignore_failures=True) - Log("- Remove instance %s" % instance) self.ExecOp(op) - def Rename(self): """Rename the instances.""" + Log("Renaming instances") rename = self.opts.rename for instance in self.instances: + Log("instance %s" % instance, indent=1) self.StopInstance(instance) self.RenameInstance(instance, rename) self.StartInstance(rename) + self._CheckInstanceAlive(rename) self.StopInstance(rename) self.RenameInstance(rename, instance) self.StartInstance(instance) + for instance in self.instances: + self._CheckInstanceAlive(instance) + + def Reinstall(self): + """Reinstall the instances.""" + Log("Reinstalling instances") + for instance in self.instances: + Log("instance %s" % instance, indent=1) + self.StopInstance(instance) + op = opcodes.OpReinstallInstance(instance_name=instance) + Log("reinstall without passing the OS", indent=2) + self.ExecOp(op) + op = opcodes.OpReinstallInstance(instance_name=instance, + os_type=self.opts.os) + Log("reinstall specifying the OS", indent=2) + self.ExecOp(op) + self.StartInstance(instance) + for instance in self.instances: + self._CheckInstanceAlive(instance) + + def Reboot(self): + """Reboot the instances.""" + Log("Rebooting instances") + for instance in self.instances: + Log("instance %s" % instance, indent=1) + for reboot_type in constants.REBOOT_TYPES: + op = opcodes.OpRebootInstance(instance_name=instance, + reboot_type=reboot_type, + ignore_secondaries=False) + Log("reboot with type '%s'" % reboot_type, indent=2) + self.ExecOp(op) + self._CheckInstanceAlive(instance) + + def ActivateDisks(self): + """Activate and deactivate disks of the instances.""" + Log("Activating/deactivating disks") + for instance in self.instances: + Log("instance %s" % instance, indent=1) + op_act = opcodes.OpActivateInstanceDisks(instance_name=instance) + op_deact = opcodes.OpDeactivateInstanceDisks(instance_name=instance) + Log("activate disks when online", indent=2) + self.ExecOp(op_act) + self.StopInstance(instance) + Log("activate disks when offline", indent=2) + self.ExecOp(op_act) + Log("deactivate disks (when offline)", indent=2) + self.ExecOp(op_deact) + self.StartInstance(instance) + for instance in self.instances: + self._CheckInstanceAlive(instance) + + def AddRemoveDisks(self): + """Add and remove an extra disk for the instances.""" + Log("Adding and removing disks") + for instance in self.instances: + Log("instance %s" % instance, indent=1) + op_add = opcodes.OpSetInstanceParams(\ + instance_name=instance, + disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})]) + op_rem = opcodes.OpSetInstanceParams(\ + instance_name=instance, disks=[(constants.DDM_REMOVE, {})]) + Log("adding a disk", indent=2) + self.ExecOp(op_add) + self.StopInstance(instance) + Log("removing last disk", indent=2) + self.ExecOp(op_rem) + self.StartInstance(instance) + for instance in self.instances: + self._CheckInstanceAlive(instance) + + def AddRemoveNICs(self): + """Add and remove an extra NIC for the instances.""" + Log("Adding and removing NICs") + for instance in self.instances: + Log("instance %s" % instance, indent=1) + op_add = opcodes.OpSetInstanceParams(\ + instance_name=instance, nics=[(constants.DDM_ADD, {})]) + op_rem = opcodes.OpSetInstanceParams(\ + instance_name=instance, nics=[(constants.DDM_REMOVE, {})]) + Log("adding a NIC", indent=2) + self.ExecOp(op_add) + Log("removing last NIC", indent=2) + self.ExecOp(op_rem) + + def _CheckInstanceAlive(self, instance): + """Check if an instance is alive by doing http checks. + + This will try to retrieve the url on the instance /hostname.txt + and check that it contains the hostname of the instance. In case + we get ECONNREFUSED, we retry up to the net timeout seconds, for + any other error we abort. + + """ + if not self.opts.http_check: + return + try: + for retries in range(self.opts.net_timeout): + try: + url = urllib2.urlopen("http://%s/hostname.txt" % instance) + except urllib2.URLError, err: + if err.args[0][0] == errno.ECONNREFUSED: + time.sleep(1) + continue + raise + except urllib2.URLError, err: + raise InstanceDown(instance, str(err)) + hostname = url.read().strip() + if hostname != instance: + raise InstanceDown(instance, ("Hostname mismatch, expected %s, got %s" % + (instance, hostname))) + def BurninCluster(self): """Test a cluster intensively. @@ -460,14 +648,13 @@ class Burner(object): opts = self.opts - Log("- Testing global parameters") + Log("Testing global parameters") if (len(self.nodes) == 1 and opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN, constants.DT_FILE)): - Log("When one node is available/selected the disk template must" + Err("When one node is available/selected the disk template must" " be 'diskless', 'file' or 'plain'") - sys.exit(1) has_err = True try: @@ -484,15 +671,32 @@ class Burner(object): if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR: self.Failover() - if opts.do_importexport: + if (opts.do_importexport and + opts.disk_template not in (constants.DT_DISKLESS, + constants.DT_FILE)): self.ImportExport() - if opts.do_startstop: - self.StopStart() + if opts.do_reinstall: + self.Reinstall() + + if opts.do_reboot: + self.Reboot() + + if opts.do_addremove_disks: + self.AddRemoveDisks() + + if opts.do_addremove_nics: + self.AddRemoveNICs() + + if opts.do_activate_disks: + self.ActivateDisks() if opts.rename: self.Rename() + if opts.do_startstop: + self.StopStart() + has_err = False finally: if has_err: