4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 from itertools import izip, islice, cycle
32 from cStringIO import StringIO
34 from ganeti import opcodes
35 from ganeti import constants
36 from ganeti import cli
37 from ganeti import errors
38 from ganeti import utils
39 from ganeti import hypervisor
40 from ganeti import compat
41 from ganeti import pathutils
43 from ganeti.confd import client as confd_client
46 USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
55 #: Disk templates supporting a single node
56 _SINGLE_NODE_DISK_TEMPLATES = compat.UniqueFrozenset([
57 constants.DT_DISKLESS,
60 constants.DT_SHARED_FILE,
65 _SUPPORTED_DISK_TEMPLATES = compat.UniqueFrozenset([
66 constants.DT_DISKLESS,
72 constants.DT_SHARED_FILE,
75 #: Disk templates for which import/export is tested
76 _IMPEXP_DISK_TEMPLATES = (_SUPPORTED_DISK_TEMPLATES - frozenset([
77 constants.DT_DISKLESS,
79 constants.DT_SHARED_FILE,
83 class InstanceDown(Exception):
84 """The checked instance was not up"""
87 class BurninFailure(Exception):
88 """Failure detected during burning"""
92 """Shows program usage information and exits the program."""
94 print >> sys.stderr, "Usage:"
95 print >> sys.stderr, USAGE
99 def Log(msg, *args, **kwargs):
100 """Simple function that prints out its argument.
105 indent = kwargs.get("indent", 0)
106 sys.stdout.write("%*s%s%s\n" % (2 * indent, "",
107 LOG_HEADERS.get(indent, " "), msg))
111 def Err(msg, exit_code=1):
112 """Simple error logging that prints to stderr.
115 sys.stderr.write(msg + "\n")
120 class SimpleOpener(urllib.FancyURLopener):
121 """A simple url opener"""
122 # pylint: disable=W0221
124 def prompt_user_passwd(self, host, realm, clear_cache=0):
125 """No-interaction version of prompt_user_passwd."""
126 # we follow parent class' API
127 # pylint: disable=W0613
130 def http_error_default(self, url, fp, errcode, errmsg, headers):
131 """Custom error handling"""
132 # make sure sockets are not left in CLOSE_WAIT, this is similar
133 # but with a different exception to the BasicURLOpener class
134 _ = fp.read() # throw away data
136 raise InstanceDown("HTTP error returned: code %s, msg %s" %
141 cli.cli_option("-o", "--os", dest="os", default=None,
142 help="OS to use during burnin",
144 completion_suggest=cli.OPT_COMPL_ONE_OS),
147 cli.cli_option("--disk-size", dest="disk_size",
148 help="Disk size (determines disk count)",
149 default="128m", type="string", metavar="<size,size,...>",
150 completion_suggest=("128M 512M 1G 4G 1G,256M"
151 " 4G,1G,1G 10G").split()),
152 cli.cli_option("--disk-growth", dest="disk_growth", help="Disk growth",
153 default="128m", type="string", metavar="<size,size,...>"),
154 cli.cli_option("--mem-size", dest="mem_size", help="Memory size",
155 default=None, type="unit", metavar="<size>",
156 completion_suggest=("128M 256M 512M 1G 4G 8G"
157 " 12G 16G").split()),
158 cli.cli_option("--maxmem-size", dest="maxmem_size", help="Max Memory size",
159 default=256, type="unit", metavar="<size>",
160 completion_suggest=("128M 256M 512M 1G 4G 8G"
161 " 12G 16G").split()),
162 cli.cli_option("--minmem-size", dest="minmem_size", help="Min Memory size",
163 default=128, type="unit", metavar="<size>",
164 completion_suggest=("128M 256M 512M 1G 4G 8G"
165 " 12G 16G").split()),
166 cli.cli_option("--vcpu-count", dest="vcpu_count", help="VCPU count",
167 default=3, type="unit", metavar="<count>",
168 completion_suggest=("1 2 3 4").split()),
173 cli.EARLY_RELEASE_OPT,
174 cli.cli_option("--no-replace1", dest="do_replace1",
175 help="Skip disk replacement with the same secondary",
176 action="store_false", default=True),
177 cli.cli_option("--no-replace2", dest="do_replace2",
178 help="Skip disk replacement with a different secondary",
179 action="store_false", default=True),
180 cli.cli_option("--no-failover", dest="do_failover",
181 help="Skip instance failovers", action="store_false",
183 cli.cli_option("--no-migrate", dest="do_migrate",
184 help="Skip instance live migration",
185 action="store_false", default=True),
186 cli.cli_option("--no-move", dest="do_move",
187 help="Skip instance moves", action="store_false",
189 cli.cli_option("--no-importexport", dest="do_importexport",
190 help="Skip instance export/import", action="store_false",
192 cli.cli_option("--no-startstop", dest="do_startstop",
193 help="Skip instance stop/start", action="store_false",
195 cli.cli_option("--no-reinstall", dest="do_reinstall",
196 help="Skip instance reinstall", action="store_false",
198 cli.cli_option("--no-reboot", dest="do_reboot",
199 help="Skip instance reboot", action="store_false",
201 cli.cli_option("--no-renamesame", dest="do_renamesame",
202 help="Skip instance rename to same name", action="store_false",
204 cli.cli_option("--reboot-types", dest="reboot_types",
205 help="Specify the reboot types", default=None),
206 cli.cli_option("--no-activate-disks", dest="do_activate_disks",
207 help="Skip disk activation/deactivation",
208 action="store_false", default=True),
209 cli.cli_option("--no-add-disks", dest="do_addremove_disks",
210 help="Skip disk addition/removal",
211 action="store_false", default=True),
212 cli.cli_option("--no-add-nics", dest="do_addremove_nics",
213 help="Skip NIC addition/removal",
214 action="store_false", default=True),
215 cli.cli_option("--no-nics", dest="nics",
216 help="No network interfaces", action="store_const",
217 const=[], default=[{}]),
218 cli.cli_option("--no-confd", dest="do_confd_tests",
219 help="Skip confd queries",
220 action="store_false", default=constants.ENABLE_CONFD),
221 cli.cli_option("--rename", dest="rename", default=None,
222 help=("Give one unused instance name which is taken"
223 " to start the renaming sequence"),
224 metavar="<instance_name>"),
225 cli.cli_option("-t", "--disk-template", dest="disk_template",
226 choices=list(_SUPPORTED_DISK_TEMPLATES),
227 default=constants.DT_DRBD8,
228 help=("Disk template (default %s, otherwise one of %s)" %
230 utils.CommaJoin(_SUPPORTED_DISK_TEMPLATES)))),
231 cli.cli_option("-n", "--nodes", dest="nodes", default="",
232 help=("Comma separated list of nodes to perform"
233 " the burnin on (defaults to all nodes)"),
234 completion_suggest=cli.OPT_COMPL_MANY_NODES),
235 cli.cli_option("-I", "--iallocator", dest="iallocator",
236 default=None, type="string",
237 help=("Perform the allocation using an iallocator"
238 " instead of fixed node spread (node restrictions no"
239 " longer apply, therefore -n/--nodes must not be"
241 completion_suggest=cli.OPT_COMPL_ONE_IALLOCATOR),
242 cli.cli_option("-p", "--parallel", default=False, action="store_true",
244 help=("Enable parallelization of some operations in"
245 " order to speed burnin or to test granular locking")),
246 cli.cli_option("--net-timeout", default=15, type="int",
248 help=("The instance check network timeout in seconds"
249 " (defaults to 15 seconds)"),
250 completion_suggest="15 60 300 900".split()),
251 cli.cli_option("-C", "--http-check", default=False, action="store_true",
253 help=("Enable checking of instance status via http,"
254 " looking for /hostname.txt that should contain the"
255 " name of the instance")),
256 cli.cli_option("-K", "--keep-instances", default=False,
258 dest="keep_instances",
259 help=("Leave instances on the cluster after burnin,"
260 " for investigation in case of errors or simply"
265 # Mainly used for bash completion
266 ARGUMENTS = [cli.ArgInstance(min=1)]
269 def _DoCheckInstances(fn):
270 """Decorator for checking instances.
273 def wrapper(self, *args, **kwargs):
274 val = fn(self, *args, **kwargs)
275 for instance in self.instances:
276 self._CheckInstanceAlive(instance) # pylint: disable=W0212
283 """Decorator for possible batch operations.
285 Must come after the _DoCheckInstances decorator (if any).
287 @param retry: whether this is a retryable batch, will be
292 def batched(self, *args, **kwargs):
293 self.StartBatch(retry)
294 val = fn(self, *args, **kwargs)
302 class Burner(object):
307 self.url_opener = SimpleOpener()
308 self._feed_buf = StringIO()
314 self.queue_retry = False
315 self.disk_count = self.disk_growth = self.disk_size = None
316 self.hvp = self.bep = None
318 self.cl = cli.GetClient()
321 def ClearFeedbackBuf(self):
322 """Clear the feedback buffer."""
323 self._feed_buf.truncate(0)
325 def GetFeedbackBuf(self):
326 """Return the contents of the buffer."""
327 return self._feed_buf.getvalue()
329 def Feedback(self, msg):
330 """Acumulate feedback in our buffer."""
331 formatted_msg = "%s %s" % (time.ctime(utils.MergeTime(msg[0])), msg[2])
332 self._feed_buf.write(formatted_msg + "\n")
333 if self.opts.verbose:
334 Log(formatted_msg, indent=3)
336 def MaybeRetry(self, retry_count, msg, fn, *args):
337 """Possibly retry a given function execution.
339 @type retry_count: int
340 @param retry_count: retry counter:
341 - 0: non-retryable action
342 - 1: last retry for a retryable action
343 - MAX_RETRIES: original try for a retryable action
345 @param msg: the kind of the operation
347 @param fn: the function to be called
352 if retry_count > 0 and retry_count < MAX_RETRIES:
353 Log("Idempotent %s succeeded after %d retries",
354 msg, MAX_RETRIES - retry_count)
356 except Exception, err: # pylint: disable=W0703
358 Log("Non-idempotent %s failed, aborting", msg)
360 elif retry_count == 1:
361 Log("Idempotent %s repeated failure, aborting", msg)
364 Log("Idempotent %s failed, retry #%d/%d: %s",
365 msg, MAX_RETRIES - retry_count + 1, MAX_RETRIES, err)
366 self.MaybeRetry(retry_count - 1, msg, fn, *args)
368 def _ExecOp(self, *ops):
369 """Execute one or more opcodes and manage the exec buffer.
371 @return: if only opcode has been passed, we return its result;
372 otherwise we return the list of results
375 job_id = cli.SendJob(ops, cl=self.cl)
376 results = cli.PollJob(job_id, cl=self.cl, feedback_fn=self.Feedback)
382 def ExecOp(self, retry, *ops):
383 """Execute one or more opcodes and manage the exec buffer.
385 @return: if only opcode has been passed, we return its result;
386 otherwise we return the list of results
393 cli.SetGenericOpcodeOpts(ops, self.opts)
394 return self.MaybeRetry(rval, "opcode", self._ExecOp, *ops)
396 def ExecOrQueue(self, name, ops, post_process=None):
397 """Execute an opcode and manage the exec buffer."""
398 if self.opts.parallel:
399 cli.SetGenericOpcodeOpts(ops, self.opts)
400 self.queued_ops.append((ops, name, post_process))
402 val = self.ExecOp(self.queue_retry, *ops) # pylint: disable=W0142
403 if post_process is not None:
407 def StartBatch(self, retry):
408 """Start a new batch of jobs.
410 @param retry: whether this is a retryable batch
414 self.queue_retry = retry
416 def CommitQueue(self):
417 """Execute all submitted opcodes in case of parallel burnin"""
418 if not self.opts.parallel or not self.queued_ops:
427 results = self.MaybeRetry(rval, "jobset", self.ExecJobSet,
433 def ExecJobSet(self, jobs):
434 """Execute a set of jobs and return once all are done.
436 The method will return the list of results, if all jobs are
437 successful. Otherwise, OpExecError will be raised from within
441 self.ClearFeedbackBuf()
442 jex = cli.JobExecutor(cl=self.cl, feedback_fn=self.Feedback)
443 for ops, name, _ in jobs:
444 jex.QueueJob(name, *ops) # pylint: disable=W0142
446 results = jex.GetResults()
447 except Exception, err: # pylint: disable=W0703
448 Log("Jobs failed: %s", err)
449 raise BurninFailure()
453 for (_, name, post_process), (success, result) in zip(jobs, results):
458 except Exception, err: # pylint: disable=W0703
459 Log("Post process call for job %s failed: %s", name, err)
466 raise BurninFailure()
470 def ParseOptions(self):
471 """Parses the command line options.
473 In case of command line errors, it will show the usage and exit the
477 parser = optparse.OptionParser(usage="\n%s" % USAGE,
478 version=("%%prog (ganeti) %s" %
479 constants.RELEASE_VERSION),
482 options, args = parser.parse_args()
483 if len(args) < 1 or options.os is None:
487 options.maxmem_size = options.mem_size
488 options.minmem_size = options.mem_size
489 elif options.minmem_size > options.maxmem_size:
490 Err("Maximum memory lower than minimum memory")
492 if options.disk_template not in _SUPPORTED_DISK_TEMPLATES:
493 Err("Unknown or unsupported disk template '%s'" % options.disk_template)
495 if options.disk_template == constants.DT_DISKLESS:
496 disk_size = disk_growth = []
497 options.do_addremove_disks = False
499 disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
500 disk_growth = [utils.ParseUnit(v)
501 for v in options.disk_growth.split(",")]
502 if len(disk_growth) != len(disk_size):
503 Err("Wrong disk sizes/growth combination")
504 if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
505 (not disk_size and options.disk_template != constants.DT_DISKLESS)):
506 Err("Wrong disk count/disk template combination")
508 self.disk_size = disk_size
509 self.disk_growth = disk_growth
510 self.disk_count = len(disk_size)
512 if options.nodes and options.iallocator:
513 Err("Give either the nodes option or the iallocator option, not both")
515 if options.http_check and not options.name_check:
516 Err("Can't enable HTTP checks without name checks")
519 self.instances = args
521 constants.BE_MINMEM: options.minmem_size,
522 constants.BE_MAXMEM: options.maxmem_size,
523 constants.BE_VCPUS: options.vcpu_count,
526 self.hypervisor = None
528 if options.hypervisor:
529 self.hypervisor, self.hvp = options.hypervisor
531 if options.reboot_types is None:
532 options.reboot_types = constants.REBOOT_TYPES
534 options.reboot_types = options.reboot_types.split(",")
535 rt_diff = set(options.reboot_types).difference(constants.REBOOT_TYPES)
537 Err("Invalid reboot types specified: %s" % utils.CommaJoin(rt_diff))
539 socket.setdefaulttimeout(options.net_timeout)
542 """Read the cluster state from the master daemon."""
544 names = self.opts.nodes.split(",")
548 op = opcodes.OpNodeQuery(output_fields=["name", "offline", "drained"],
549 names=names, use_locking=True)
550 result = self.ExecOp(True, op)
551 except errors.GenericError, err:
552 err_code, msg = cli.FormatError(err)
553 Err(msg, exit_code=err_code)
554 self.nodes = [data[0] for data in result if not (data[1] or data[2])]
556 op_diagnose = opcodes.OpOsDiagnose(output_fields=["name",
560 result = self.ExecOp(True, op_diagnose)
563 Err("Can't get the OS list")
566 for (name, variants, _) in result:
567 if self.opts.os in cli.CalculateOSNames(name, variants):
572 Err("OS '%s' not found" % self.opts.os)
574 cluster_info = self.cl.QueryClusterInfo()
575 self.cluster_info = cluster_info
576 if not self.cluster_info:
577 Err("Can't get cluster info")
579 default_nic_params = self.cluster_info["nicparams"][constants.PP_DEFAULT]
580 self.cluster_default_nicparams = default_nic_params
581 if self.hypervisor is None:
582 self.hypervisor = self.cluster_info["default_hypervisor"]
583 self.hv_can_migrate = \
584 hypervisor.GetHypervisorClass(self.hypervisor).CAN_MIGRATE
588 def BurnCreateInstances(self):
589 """Create the given instances.
593 mytor = izip(cycle(self.nodes),
594 islice(cycle(self.nodes), 1, None),
597 Log("Creating instances")
598 for pnode, snode, instance in mytor:
599 Log("instance %s", instance, indent=1)
600 if self.opts.iallocator:
602 msg = "with iallocator %s" % self.opts.iallocator
603 elif self.opts.disk_template not in constants.DTS_INT_MIRROR:
605 msg = "on %s" % pnode
607 msg = "on %s, %s" % (pnode, snode)
611 op = opcodes.OpInstanceCreate(instance_name=instance,
612 disks=[{"size": size}
613 for size in self.disk_size],
614 disk_template=self.opts.disk_template,
616 mode=constants.INSTANCE_CREATE,
617 os_type=self.opts.os,
621 ip_check=self.opts.ip_check,
622 name_check=self.opts.name_check,
625 file_storage_dir=None,
626 iallocator=self.opts.iallocator,
629 hypervisor=self.hypervisor,
630 osparams=self.opts.osparams,
632 remove_instance = lambda name: lambda: self.to_rem.append(name)
633 self.ExecOrQueue(instance, [op], post_process=remove_instance(instance))
636 def BurnModifyRuntimeMemory(self):
637 """Alter the runtime memory."""
638 Log("Setting instance runtime memory")
639 for instance in self.instances:
640 Log("instance %s", instance, indent=1)
641 tgt_mem = self.bep[constants.BE_MINMEM]
642 op = opcodes.OpInstanceSetParams(instance_name=instance,
644 Log("Set memory to %s MB", tgt_mem, indent=2)
645 self.ExecOrQueue(instance, [op])
648 def BurnGrowDisks(self):
649 """Grow both the os and the swap disks by the requested amount, if any."""
651 for instance in self.instances:
652 Log("instance %s", instance, indent=1)
653 for idx, growth in enumerate(self.disk_growth):
655 op = opcodes.OpInstanceGrowDisk(instance_name=instance, disk=idx,
656 amount=growth, wait_for_sync=True)
657 Log("increase disk/%s by %s MB", idx, growth, indent=2)
658 self.ExecOrQueue(instance, [op])
661 def BurnReplaceDisks1D8(self):
662 """Replace disks on primary and secondary for drbd8."""
663 Log("Replacing disks on the same nodes")
664 early_release = self.opts.early_release
665 for instance in self.instances:
666 Log("instance %s", instance, indent=1)
668 for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
669 op = opcodes.OpInstanceReplaceDisks(instance_name=instance,
671 disks=list(range(self.disk_count)),
672 early_release=early_release)
673 Log("run %s", mode, indent=2)
675 self.ExecOrQueue(instance, ops)
678 def BurnReplaceDisks2(self):
679 """Replace secondary node."""
680 Log("Changing the secondary node")
681 mode = constants.REPLACE_DISK_CHG
683 mytor = izip(islice(cycle(self.nodes), 2, None),
685 for tnode, instance in mytor:
686 Log("instance %s", instance, indent=1)
687 if self.opts.iallocator:
689 msg = "with iallocator %s" % self.opts.iallocator
692 op = opcodes.OpInstanceReplaceDisks(instance_name=instance,
695 iallocator=self.opts.iallocator,
697 early_release=self.opts.early_release)
698 Log("run %s %s", mode, msg, indent=2)
699 self.ExecOrQueue(instance, [op])
703 def BurnFailover(self):
704 """Failover the instances."""
705 Log("Failing over instances")
706 for instance in self.instances:
707 Log("instance %s", instance, indent=1)
708 op = opcodes.OpInstanceFailover(instance_name=instance,
709 ignore_consistency=False)
710 self.ExecOrQueue(instance, [op])
715 """Move the instances."""
716 Log("Moving instances")
717 mytor = izip(islice(cycle(self.nodes), 1, None),
719 for tnode, instance in mytor:
720 Log("instance %s", instance, indent=1)
721 op = opcodes.OpInstanceMove(instance_name=instance,
723 self.ExecOrQueue(instance, [op])
726 def BurnMigrate(self):
727 """Migrate the instances."""
728 Log("Migrating instances")
729 for instance in self.instances:
730 Log("instance %s", instance, indent=1)
731 op1 = opcodes.OpInstanceMigrate(instance_name=instance, mode=None,
734 op2 = opcodes.OpInstanceMigrate(instance_name=instance, mode=None,
736 Log("migration and migration cleanup", indent=2)
737 self.ExecOrQueue(instance, [op1, op2])
741 def BurnImportExport(self):
742 """Export the instance, delete it, and import it back.
745 Log("Exporting and re-importing instances")
746 mytor = izip(cycle(self.nodes),
747 islice(cycle(self.nodes), 1, None),
748 islice(cycle(self.nodes), 2, None),
751 for pnode, snode, enode, instance in mytor:
752 Log("instance %s", instance, indent=1)
753 # read the full name of the instance
754 nam_op = opcodes.OpInstanceQuery(output_fields=["name"],
755 names=[instance], use_locking=True)
756 full_name = self.ExecOp(False, nam_op)[0][0]
758 if self.opts.iallocator:
760 import_log_msg = ("import from %s"
761 " with iallocator %s" %
762 (enode, self.opts.iallocator))
763 elif self.opts.disk_template not in constants.DTS_INT_MIRROR:
765 import_log_msg = ("import from %s to %s" %
768 import_log_msg = ("import from %s to %s, %s" %
769 (enode, pnode, snode))
771 exp_op = opcodes.OpBackupExport(instance_name=instance,
773 mode=constants.EXPORT_MODE_LOCAL,
775 rem_op = opcodes.OpInstanceRemove(instance_name=instance,
776 ignore_failures=True)
777 imp_dir = utils.PathJoin(pathutils.EXPORT_DIR, full_name)
778 imp_op = opcodes.OpInstanceCreate(instance_name=instance,
779 disks=[{"size": size}
780 for size in self.disk_size],
781 disk_template=self.opts.disk_template,
783 mode=constants.INSTANCE_IMPORT,
789 ip_check=self.opts.ip_check,
790 name_check=self.opts.name_check,
792 file_storage_dir=None,
794 iallocator=self.opts.iallocator,
797 osparams=self.opts.osparams,
800 erem_op = opcodes.OpBackupRemove(instance_name=instance)
802 Log("export to node %s", enode, indent=2)
803 Log("remove instance", indent=2)
804 Log(import_log_msg, indent=2)
805 Log("remove export", indent=2)
806 self.ExecOrQueue(instance, [exp_op, rem_op, imp_op, erem_op])
809 def StopInstanceOp(instance):
810 """Stop given instance."""
811 return opcodes.OpInstanceShutdown(instance_name=instance)
814 def StartInstanceOp(instance):
815 """Start given instance."""
816 return opcodes.OpInstanceStartup(instance_name=instance, force=False)
819 def RenameInstanceOp(instance, instance_new):
820 """Rename instance."""
821 return opcodes.OpInstanceRename(instance_name=instance,
822 new_name=instance_new)
826 def BurnStopStart(self):
827 """Stop/start the instances."""
828 Log("Stopping and starting instances")
829 for instance in self.instances:
830 Log("instance %s", instance, indent=1)
831 op1 = self.StopInstanceOp(instance)
832 op2 = self.StartInstanceOp(instance)
833 self.ExecOrQueue(instance, [op1, op2])
836 def BurnRemove(self):
837 """Remove the instances."""
838 Log("Removing instances")
839 for instance in self.to_rem:
840 Log("instance %s", instance, indent=1)
841 op = opcodes.OpInstanceRemove(instance_name=instance,
842 ignore_failures=True)
843 self.ExecOrQueue(instance, [op])
845 def BurnRename(self):
846 """Rename the instances.
848 Note that this function will not execute in parallel, since we
849 only have one target for rename.
852 Log("Renaming instances")
853 rename = self.opts.rename
854 for instance in self.instances:
855 Log("instance %s", instance, indent=1)
856 op_stop1 = self.StopInstanceOp(instance)
857 op_stop2 = self.StopInstanceOp(rename)
858 op_rename1 = self.RenameInstanceOp(instance, rename)
859 op_rename2 = self.RenameInstanceOp(rename, instance)
860 op_start1 = self.StartInstanceOp(rename)
861 op_start2 = self.StartInstanceOp(instance)
862 self.ExecOp(False, op_stop1, op_rename1, op_start1)
863 self._CheckInstanceAlive(rename)
864 self.ExecOp(False, op_stop2, op_rename2, op_start2)
865 self._CheckInstanceAlive(instance)
869 def BurnReinstall(self):
870 """Reinstall the instances."""
871 Log("Reinstalling instances")
872 for instance in self.instances:
873 Log("instance %s", instance, indent=1)
874 op1 = self.StopInstanceOp(instance)
875 op2 = opcodes.OpInstanceReinstall(instance_name=instance)
876 Log("reinstall without passing the OS", indent=2)
877 op3 = opcodes.OpInstanceReinstall(instance_name=instance,
878 os_type=self.opts.os)
879 Log("reinstall specifying the OS", indent=2)
880 op4 = self.StartInstanceOp(instance)
881 self.ExecOrQueue(instance, [op1, op2, op3, op4])
885 def BurnReboot(self):
886 """Reboot the instances."""
887 Log("Rebooting instances")
888 for instance in self.instances:
889 Log("instance %s", instance, indent=1)
891 for reboot_type in self.opts.reboot_types:
892 op = opcodes.OpInstanceReboot(instance_name=instance,
893 reboot_type=reboot_type,
894 ignore_secondaries=False)
895 Log("reboot with type '%s'", reboot_type, indent=2)
897 self.ExecOrQueue(instance, ops)
901 def BurnRenameSame(self):
902 """Rename the instances to their own name."""
903 Log("Renaming the instances to their own name")
904 for instance in self.instances:
905 Log("instance %s", instance, indent=1)
906 op1 = self.StopInstanceOp(instance)
907 op2 = self.RenameInstanceOp(instance, instance)
908 Log("rename to the same name", indent=2)
909 op4 = self.StartInstanceOp(instance)
910 self.ExecOrQueue(instance, [op1, op2, op4])
914 def BurnActivateDisks(self):
915 """Activate and deactivate disks of the instances."""
916 Log("Activating/deactivating disks")
917 for instance in self.instances:
918 Log("instance %s", instance, indent=1)
919 op_start = self.StartInstanceOp(instance)
920 op_act = opcodes.OpInstanceActivateDisks(instance_name=instance)
921 op_deact = opcodes.OpInstanceDeactivateDisks(instance_name=instance)
922 op_stop = self.StopInstanceOp(instance)
923 Log("activate disks when online", indent=2)
924 Log("activate disks when offline", indent=2)
925 Log("deactivate disks (when offline)", indent=2)
926 self.ExecOrQueue(instance, [op_act, op_stop, op_act, op_deact, op_start])
930 def BurnAddRemoveDisks(self):
931 """Add and remove an extra disk for the instances."""
932 Log("Adding and removing disks")
933 for instance in self.instances:
934 Log("instance %s", instance, indent=1)
935 op_add = opcodes.OpInstanceSetParams(
936 instance_name=instance,
937 disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})])
938 op_rem = opcodes.OpInstanceSetParams(
939 instance_name=instance, disks=[(constants.DDM_REMOVE, {})])
940 op_stop = self.StopInstanceOp(instance)
941 op_start = self.StartInstanceOp(instance)
942 Log("adding a disk", indent=2)
943 Log("removing last disk", indent=2)
944 self.ExecOrQueue(instance, [op_add, op_stop, op_rem, op_start])
947 def BurnAddRemoveNICs(self):
948 """Add, change and remove an extra NIC for the instances."""
949 Log("Adding and removing NICs")
950 for instance in self.instances:
951 Log("instance %s", instance, indent=1)
952 op_add = opcodes.OpInstanceSetParams(
953 instance_name=instance, nics=[(constants.DDM_ADD, {})])
954 op_chg = opcodes.OpInstanceSetParams(
955 instance_name=instance, nics=[(constants.DDM_MODIFY,
956 -1, {"mac": constants.VALUE_GENERATE})])
957 op_rem = opcodes.OpInstanceSetParams(
958 instance_name=instance, nics=[(constants.DDM_REMOVE, {})])
959 Log("adding a NIC", indent=2)
960 Log("changing a NIC", indent=2)
961 Log("removing last NIC", indent=2)
962 self.ExecOrQueue(instance, [op_add, op_chg, op_rem])
964 def ConfdCallback(self, reply):
965 """Callback for confd queries"""
966 if reply.type == confd_client.UPCALL_REPLY:
967 if reply.server_reply.status != constants.CONFD_REPL_STATUS_OK:
968 Err("Query %s gave non-ok status %s: %s" % (reply.orig_request,
969 reply.server_reply.status,
971 if reply.orig_request.type == constants.CONFD_REQ_PING:
972 Log("Ping: OK", indent=1)
973 elif reply.orig_request.type == constants.CONFD_REQ_CLUSTER_MASTER:
974 if reply.server_reply.answer == self.cluster_info["master"]:
975 Log("Master: OK", indent=1)
977 Err("Master: wrong: %s" % reply.server_reply.answer)
978 elif reply.orig_request.type == constants.CONFD_REQ_NODE_ROLE_BYNAME:
979 if reply.server_reply.answer == constants.CONFD_NODE_ROLE_MASTER:
980 Log("Node role for master: OK", indent=1)
982 Err("Node role for master: wrong: %s" % reply.server_reply.answer)
984 def DoConfdRequestReply(self, req):
985 self.confd_counting_callback.RegisterQuery(req.rsalt)
986 self.confd_client.SendRequest(req, async=False)
987 while not self.confd_counting_callback.AllAnswered():
988 if not self.confd_client.ReceiveReply():
989 Err("Did not receive all expected confd replies")
993 """Run confd queries for our instances.
995 The following confd queries are tested:
996 - CONFD_REQ_PING: simple ping
997 - CONFD_REQ_CLUSTER_MASTER: cluster master
998 - CONFD_REQ_NODE_ROLE_BYNAME: node role, for the master
1001 Log("Checking confd results")
1003 filter_callback = confd_client.ConfdFilterCallback(self.ConfdCallback)
1004 counting_callback = confd_client.ConfdCountingCallback(filter_callback)
1005 self.confd_counting_callback = counting_callback
1007 self.confd_client = confd_client.GetConfdClient(counting_callback)
1009 req = confd_client.ConfdClientRequest(type=constants.CONFD_REQ_PING)
1010 self.DoConfdRequestReply(req)
1012 req = confd_client.ConfdClientRequest(
1013 type=constants.CONFD_REQ_CLUSTER_MASTER)
1014 self.DoConfdRequestReply(req)
1016 req = confd_client.ConfdClientRequest(
1017 type=constants.CONFD_REQ_NODE_ROLE_BYNAME,
1018 query=self.cluster_info["master"])
1019 self.DoConfdRequestReply(req)
1021 def _CheckInstanceAlive(self, instance):
1022 """Check if an instance is alive by doing http checks.
1024 This will try to retrieve the url on the instance /hostname.txt
1025 and check that it contains the hostname of the instance. In case
1026 we get ECONNREFUSED, we retry up to the net timeout seconds, for
1027 any other error we abort.
1030 if not self.opts.http_check:
1032 end_time = time.time() + self.opts.net_timeout
1034 while time.time() < end_time and url is None:
1036 url = self.url_opener.open("http://%s/hostname.txt" % instance)
1038 # here we can have connection refused, no route to host, etc.
1041 raise InstanceDown(instance, "Cannot contact instance")
1042 hostname = url.read().strip()
1044 if hostname != instance:
1045 raise InstanceDown(instance, ("Hostname mismatch, expected %s, got %s" %
1046 (instance, hostname)))
1048 def BurninCluster(self):
1049 """Test a cluster intensively.
1051 This will create instances and then start/stop/failover them.
1052 It is safe for existing instances but could impact performance.
1058 Log("Testing global parameters")
1060 if (len(self.nodes) == 1 and
1061 opts.disk_template not in _SINGLE_NODE_DISK_TEMPLATES):
1062 Err("When one node is available/selected the disk template must"
1063 " be one of %s" % utils.CommaJoin(_SINGLE_NODE_DISK_TEMPLATES))
1065 if opts.do_confd_tests and not constants.ENABLE_CONFD:
1066 Err("You selected confd tests but confd was disabled at configure time")
1070 self.BurnCreateInstances()
1072 if self.bep[constants.BE_MINMEM] < self.bep[constants.BE_MAXMEM]:
1073 self.BurnModifyRuntimeMemory()
1075 if opts.do_replace1 and opts.disk_template in constants.DTS_INT_MIRROR:
1076 self.BurnReplaceDisks1D8()
1077 if (opts.do_replace2 and len(self.nodes) > 2 and
1078 opts.disk_template in constants.DTS_INT_MIRROR):
1079 self.BurnReplaceDisks2()
1081 if (opts.disk_template in constants.DTS_GROWABLE and
1082 compat.any(n > 0 for n in self.disk_growth)):
1083 self.BurnGrowDisks()
1085 if opts.do_failover and opts.disk_template in constants.DTS_MIRRORED:
1089 if opts.disk_template not in constants.DTS_MIRRORED:
1090 Log("Skipping migration (disk template %s does not support it)",
1092 elif not self.hv_can_migrate:
1093 Log("Skipping migration (hypervisor %s does not support it)",
1098 if (opts.do_move and len(self.nodes) > 1 and
1099 opts.disk_template in [constants.DT_PLAIN, constants.DT_FILE]):
1102 if (opts.do_importexport and
1103 opts.disk_template in _IMPEXP_DISK_TEMPLATES):
1104 self.BurnImportExport()
1106 if opts.do_reinstall:
1107 self.BurnReinstall()
1112 if opts.do_renamesame:
1113 self.BurnRenameSame()
1115 if opts.do_addremove_disks:
1116 self.BurnAddRemoveDisks()
1118 default_nic_mode = self.cluster_default_nicparams[constants.NIC_MODE]
1119 # Don't add/remove nics in routed mode, as we would need an ip to add
1121 if opts.do_addremove_nics:
1122 if default_nic_mode == constants.NIC_MODE_BRIDGED:
1123 self.BurnAddRemoveNICs()
1125 Log("Skipping nic add/remove as the cluster is not in bridged mode")
1127 if opts.do_activate_disks:
1128 self.BurnActivateDisks()
1133 if opts.do_confd_tests:
1136 if opts.do_startstop:
1137 self.BurnStopStart()
1142 Log("Error detected: opcode buffer follows:\n\n")
1143 Log(self.GetFeedbackBuf())
1145 if not self.opts.keep_instances:
1148 except Exception, err: # pylint: disable=W0703
1149 if has_err: # already detected errors, so errors in removal
1150 # are quite expected
1151 Log("Note: error detected during instance remove: %s", err)
1152 else: # non-expected error
1155 return constants.EXIT_SUCCESS
1162 utils.SetupLogging(pathutils.LOG_BURNIN, sys.argv[0],
1163 debug=False, stderr_logging=True)
1165 return Burner().BurninCluster()