4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 from itertools import izip, islice, cycle
32 from cStringIO import StringIO
34 from ganeti import opcodes
35 from ganeti import constants
36 from ganeti import cli
37 from ganeti import errors
38 from ganeti import utils
39 from ganeti import hypervisor
40 from ganeti import compat
42 from ganeti.confd import client as confd_client
45 USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
55 class InstanceDown(Exception):
56 """The checked instance was not up"""
59 class BurninFailure(Exception):
60 """Failure detected during burning"""
64 """Shows program usage information and exits the program."""
66 print >> sys.stderr, "Usage:"
67 print >> sys.stderr, USAGE
71 def Log(msg, *args, **kwargs):
72 """Simple function that prints out its argument.
77 indent = kwargs.get("indent", 0)
78 sys.stdout.write("%*s%s%s\n" % (2 * indent, "",
79 LOG_HEADERS.get(indent, " "), msg))
83 def Err(msg, exit_code=1):
84 """Simple error logging that prints to stderr.
87 sys.stderr.write(msg + "\n")
92 class SimpleOpener(urllib.FancyURLopener):
93 """A simple url opener"""
94 # pylint: disable=W0221
96 def prompt_user_passwd(self, host, realm, clear_cache=0):
97 """No-interaction version of prompt_user_passwd."""
98 # we follow parent class' API
99 # pylint: disable=W0613
102 def http_error_default(self, url, fp, errcode, errmsg, headers):
103 """Custom error handling"""
104 # make sure sockets are not left in CLOSE_WAIT, this is similar
105 # but with a different exception to the BasicURLOpener class
106 _ = fp.read() # throw away data
108 raise InstanceDown("HTTP error returned: code %s, msg %s" %
113 cli.cli_option("-o", "--os", dest="os", default=None,
114 help="OS to use during burnin",
116 completion_suggest=cli.OPT_COMPL_ONE_OS),
119 cli.cli_option("--disk-size", dest="disk_size",
120 help="Disk size (determines disk count)",
121 default="128m", type="string", metavar="<size,size,...>",
122 completion_suggest=("128M 512M 1G 4G 1G,256M"
123 " 4G,1G,1G 10G").split()),
124 cli.cli_option("--disk-growth", dest="disk_growth", help="Disk growth",
125 default="128m", type="string", metavar="<size,size,...>"),
126 cli.cli_option("--mem-size", dest="mem_size", help="Memory size",
127 default=None, type="unit", metavar="<size>",
128 completion_suggest=("128M 256M 512M 1G 4G 8G"
129 " 12G 16G").split()),
130 cli.cli_option("--maxmem-size", dest="maxmem_size", help="Max Memory size",
131 default=256, type="unit", metavar="<size>",
132 completion_suggest=("128M 256M 512M 1G 4G 8G"
133 " 12G 16G").split()),
134 cli.cli_option("--minmem-size", dest="minmem_size", help="Min Memory size",
135 default=128, type="unit", metavar="<size>",
136 completion_suggest=("128M 256M 512M 1G 4G 8G"
137 " 12G 16G").split()),
138 cli.cli_option("--vcpu-count", dest="vcpu_count", help="VCPU count",
139 default=3, type="unit", metavar="<count>",
140 completion_suggest=("1 2 3 4").split()),
145 cli.EARLY_RELEASE_OPT,
146 cli.cli_option("--no-replace1", dest="do_replace1",
147 help="Skip disk replacement with the same secondary",
148 action="store_false", default=True),
149 cli.cli_option("--no-replace2", dest="do_replace2",
150 help="Skip disk replacement with a different secondary",
151 action="store_false", default=True),
152 cli.cli_option("--no-failover", dest="do_failover",
153 help="Skip instance failovers", action="store_false",
155 cli.cli_option("--no-migrate", dest="do_migrate",
156 help="Skip instance live migration",
157 action="store_false", default=True),
158 cli.cli_option("--no-move", dest="do_move",
159 help="Skip instance moves", action="store_false",
161 cli.cli_option("--no-importexport", dest="do_importexport",
162 help="Skip instance export/import", action="store_false",
164 cli.cli_option("--no-startstop", dest="do_startstop",
165 help="Skip instance stop/start", action="store_false",
167 cli.cli_option("--no-reinstall", dest="do_reinstall",
168 help="Skip instance reinstall", action="store_false",
170 cli.cli_option("--no-reboot", dest="do_reboot",
171 help="Skip instance reboot", action="store_false",
173 cli.cli_option("--reboot-types", dest="reboot_types",
174 help="Specify the reboot types", default=None),
175 cli.cli_option("--no-activate-disks", dest="do_activate_disks",
176 help="Skip disk activation/deactivation",
177 action="store_false", default=True),
178 cli.cli_option("--no-add-disks", dest="do_addremove_disks",
179 help="Skip disk addition/removal",
180 action="store_false", default=True),
181 cli.cli_option("--no-add-nics", dest="do_addremove_nics",
182 help="Skip NIC addition/removal",
183 action="store_false", default=True),
184 cli.cli_option("--no-nics", dest="nics",
185 help="No network interfaces", action="store_const",
186 const=[], default=[{}]),
187 cli.cli_option("--no-confd", dest="do_confd_tests",
188 help="Skip confd queries",
189 action="store_false", default=constants.ENABLE_CONFD),
190 cli.cli_option("--rename", dest="rename", default=None,
191 help=("Give one unused instance name which is taken"
192 " to start the renaming sequence"),
193 metavar="<instance_name>"),
194 cli.cli_option("-t", "--disk-template", dest="disk_template",
195 choices=list(constants.DISK_TEMPLATES),
196 default=constants.DT_DRBD8,
197 help="Disk template (diskless, file, plain, sharedfile"
199 cli.cli_option("-n", "--nodes", dest="nodes", default="",
200 help=("Comma separated list of nodes to perform"
201 " the burnin on (defaults to all nodes)"),
202 completion_suggest=cli.OPT_COMPL_MANY_NODES),
203 cli.cli_option("-I", "--iallocator", dest="iallocator",
204 default=None, type="string",
205 help=("Perform the allocation using an iallocator"
206 " instead of fixed node spread (node restrictions no"
207 " longer apply, therefore -n/--nodes must not be"
209 completion_suggest=cli.OPT_COMPL_ONE_IALLOCATOR),
210 cli.cli_option("-p", "--parallel", default=False, action="store_true",
212 help=("Enable parallelization of some operations in"
213 " order to speed burnin or to test granular locking")),
214 cli.cli_option("--net-timeout", default=15, type="int",
216 help=("The instance check network timeout in seconds"
217 " (defaults to 15 seconds)"),
218 completion_suggest="15 60 300 900".split()),
219 cli.cli_option("-C", "--http-check", default=False, action="store_true",
221 help=("Enable checking of instance status via http,"
222 " looking for /hostname.txt that should contain the"
223 " name of the instance")),
224 cli.cli_option("-K", "--keep-instances", default=False,
226 dest="keep_instances",
227 help=("Leave instances on the cluster after burnin,"
228 " for investigation in case of errors or simply"
232 # Mainly used for bash completion
233 ARGUMENTS = [cli.ArgInstance(min=1)]
236 def _DoCheckInstances(fn):
237 """Decorator for checking instances.
240 def wrapper(self, *args, **kwargs):
241 val = fn(self, *args, **kwargs)
242 for instance in self.instances:
243 self._CheckInstanceAlive(instance) # pylint: disable=W0212
250 """Decorator for possible batch operations.
252 Must come after the _DoCheckInstances decorator (if any).
254 @param retry: whether this is a retryable batch, will be
259 def batched(self, *args, **kwargs):
260 self.StartBatch(retry)
261 val = fn(self, *args, **kwargs)
269 class Burner(object):
274 self.url_opener = SimpleOpener()
275 self._feed_buf = StringIO()
281 self.queue_retry = False
282 self.disk_count = self.disk_growth = self.disk_size = None
283 self.hvp = self.bep = None
285 self.cl = cli.GetClient()
288 def ClearFeedbackBuf(self):
289 """Clear the feedback buffer."""
290 self._feed_buf.truncate(0)
292 def GetFeedbackBuf(self):
293 """Return the contents of the buffer."""
294 return self._feed_buf.getvalue()
296 def Feedback(self, msg):
297 """Acumulate feedback in our buffer."""
298 formatted_msg = "%s %s" % (time.ctime(utils.MergeTime(msg[0])), msg[2])
299 self._feed_buf.write(formatted_msg + "\n")
300 if self.opts.verbose:
301 Log(formatted_msg, indent=3)
303 def MaybeRetry(self, retry_count, msg, fn, *args):
304 """Possibly retry a given function execution.
306 @type retry_count: int
307 @param retry_count: retry counter:
308 - 0: non-retryable action
309 - 1: last retry for a retryable action
310 - MAX_RETRIES: original try for a retryable action
312 @param msg: the kind of the operation
314 @param fn: the function to be called
319 if retry_count > 0 and retry_count < MAX_RETRIES:
320 Log("Idempotent %s succeeded after %d retries",
321 msg, MAX_RETRIES - retry_count)
323 except Exception, err: # pylint: disable=W0703
325 Log("Non-idempotent %s failed, aborting", msg)
327 elif retry_count == 1:
328 Log("Idempotent %s repeated failure, aborting", msg)
331 Log("Idempotent %s failed, retry #%d/%d: %s",
332 msg, MAX_RETRIES - retry_count + 1, MAX_RETRIES, err)
333 self.MaybeRetry(retry_count - 1, msg, fn, *args)
335 def _ExecOp(self, *ops):
336 """Execute one or more opcodes and manage the exec buffer.
338 @return: if only opcode has been passed, we return its result;
339 otherwise we return the list of results
342 job_id = cli.SendJob(ops, cl=self.cl)
343 results = cli.PollJob(job_id, cl=self.cl, feedback_fn=self.Feedback)
349 def ExecOp(self, retry, *ops):
350 """Execute one or more opcodes and manage the exec buffer.
352 @return: if only opcode has been passed, we return its result;
353 otherwise we return the list of results
360 cli.SetGenericOpcodeOpts(ops, self.opts)
361 return self.MaybeRetry(rval, "opcode", self._ExecOp, *ops)
363 def ExecOrQueue(self, name, ops, post_process=None):
364 """Execute an opcode and manage the exec buffer."""
365 if self.opts.parallel:
366 cli.SetGenericOpcodeOpts(ops, self.opts)
367 self.queued_ops.append((ops, name, post_process))
369 val = self.ExecOp(self.queue_retry, *ops) # pylint: disable=W0142
370 if post_process is not None:
374 def StartBatch(self, retry):
375 """Start a new batch of jobs.
377 @param retry: whether this is a retryable batch
381 self.queue_retry = retry
383 def CommitQueue(self):
384 """Execute all submitted opcodes in case of parallel burnin"""
385 if not self.opts.parallel or not self.queued_ops:
394 results = self.MaybeRetry(rval, "jobset", self.ExecJobSet,
400 def ExecJobSet(self, jobs):
401 """Execute a set of jobs and return once all are done.
403 The method will return the list of results, if all jobs are
404 successful. Otherwise, OpExecError will be raised from within
408 self.ClearFeedbackBuf()
409 jex = cli.JobExecutor(cl=self.cl, feedback_fn=self.Feedback)
410 for ops, name, _ in jobs:
411 jex.QueueJob(name, *ops) # pylint: disable=W0142
413 results = jex.GetResults()
414 except Exception, err: # pylint: disable=W0703
415 Log("Jobs failed: %s", err)
416 raise BurninFailure()
420 for (_, name, post_process), (success, result) in zip(jobs, results):
425 except Exception, err: # pylint: disable=W0703
426 Log("Post process call for job %s failed: %s", name, err)
433 raise BurninFailure()
437 def ParseOptions(self):
438 """Parses the command line options.
440 In case of command line errors, it will show the usage and exit the
444 parser = optparse.OptionParser(usage="\n%s" % USAGE,
445 version=("%%prog (ganeti) %s" %
446 constants.RELEASE_VERSION),
449 options, args = parser.parse_args()
450 if len(args) < 1 or options.os is None:
454 options.maxmem_size = options.mem_size
455 options.minmem_size = options.mem_size
456 elif options.minmem_size > options.maxmem_size:
457 Err("Maximum memory lower than minimum memory")
459 supported_disk_templates = (constants.DT_DISKLESS,
461 constants.DT_SHARED_FILE,
466 if options.disk_template not in supported_disk_templates:
467 Err("Unknown disk template '%s'" % options.disk_template)
469 if options.disk_template == constants.DT_DISKLESS:
470 disk_size = disk_growth = []
471 options.do_addremove_disks = False
473 disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
474 disk_growth = [utils.ParseUnit(v)
475 for v in options.disk_growth.split(",")]
476 if len(disk_growth) != len(disk_size):
477 Err("Wrong disk sizes/growth combination")
478 if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
479 (not disk_size and options.disk_template != constants.DT_DISKLESS)):
480 Err("Wrong disk count/disk template combination")
482 self.disk_size = disk_size
483 self.disk_growth = disk_growth
484 self.disk_count = len(disk_size)
486 if options.nodes and options.iallocator:
487 Err("Give either the nodes option or the iallocator option, not both")
489 if options.http_check and not options.name_check:
490 Err("Can't enable HTTP checks without name checks")
493 self.instances = args
495 constants.BE_MINMEM: options.minmem_size,
496 constants.BE_MAXMEM: options.maxmem_size,
497 constants.BE_VCPUS: options.vcpu_count,
500 self.hypervisor = None
502 if options.hypervisor:
503 self.hypervisor, self.hvp = options.hypervisor
505 if options.reboot_types is None:
506 options.reboot_types = constants.REBOOT_TYPES
508 options.reboot_types = options.reboot_types.split(",")
509 rt_diff = set(options.reboot_types).difference(constants.REBOOT_TYPES)
511 Err("Invalid reboot types specified: %s" % utils.CommaJoin(rt_diff))
513 socket.setdefaulttimeout(options.net_timeout)
516 """Read the cluster state from the master daemon."""
518 names = self.opts.nodes.split(",")
522 op = opcodes.OpNodeQuery(output_fields=["name", "offline", "drained"],
523 names=names, use_locking=True)
524 result = self.ExecOp(True, op)
525 except errors.GenericError, err:
526 err_code, msg = cli.FormatError(err)
527 Err(msg, exit_code=err_code)
528 self.nodes = [data[0] for data in result if not (data[1] or data[2])]
530 op_diagnose = opcodes.OpOsDiagnose(output_fields=["name",
534 result = self.ExecOp(True, op_diagnose)
537 Err("Can't get the OS list")
540 for (name, variants, _) in result:
541 if self.opts.os in cli.CalculateOSNames(name, variants):
546 Err("OS '%s' not found" % self.opts.os)
548 cluster_info = self.cl.QueryClusterInfo()
549 self.cluster_info = cluster_info
550 if not self.cluster_info:
551 Err("Can't get cluster info")
553 default_nic_params = self.cluster_info["nicparams"][constants.PP_DEFAULT]
554 self.cluster_default_nicparams = default_nic_params
555 if self.hypervisor is None:
556 self.hypervisor = self.cluster_info["default_hypervisor"]
557 self.hv_class = hypervisor.GetHypervisorClass(self.hypervisor)
561 def BurnCreateInstances(self):
562 """Create the given instances.
566 mytor = izip(cycle(self.nodes),
567 islice(cycle(self.nodes), 1, None),
570 Log("Creating instances")
571 for pnode, snode, instance in mytor:
572 Log("instance %s", instance, indent=1)
573 if self.opts.iallocator:
575 msg = "with iallocator %s" % self.opts.iallocator
576 elif self.opts.disk_template not in constants.DTS_INT_MIRROR:
578 msg = "on %s" % pnode
580 msg = "on %s, %s" % (pnode, snode)
584 op = opcodes.OpInstanceCreate(instance_name=instance,
585 disks=[{"size": size}
586 for size in self.disk_size],
587 disk_template=self.opts.disk_template,
589 mode=constants.INSTANCE_CREATE,
590 os_type=self.opts.os,
594 ip_check=self.opts.ip_check,
595 name_check=self.opts.name_check,
598 file_storage_dir=None,
599 iallocator=self.opts.iallocator,
602 hypervisor=self.hypervisor,
603 osparams=self.opts.osparams,
605 remove_instance = lambda name: lambda: self.to_rem.append(name)
606 self.ExecOrQueue(instance, [op], post_process=remove_instance(instance))
609 def BurnModifyRuntimeMemory(self):
610 """Alter the runtime memory."""
611 Log("Setting instance runtime memory")
612 for instance in self.instances:
613 Log("instance %s", instance, indent=1)
614 tgt_mem = self.bep[constants.BE_MINMEM]
615 op = opcodes.OpInstanceSetParams(instance_name=instance,
617 Log("Set memory to %s MB", tgt_mem, indent=2)
618 self.ExecOrQueue(instance, [op])
621 def BurnGrowDisks(self):
622 """Grow both the os and the swap disks by the requested amount, if any."""
624 for instance in self.instances:
625 Log("instance %s", instance, indent=1)
626 for idx, growth in enumerate(self.disk_growth):
628 op = opcodes.OpInstanceGrowDisk(instance_name=instance, disk=idx,
629 amount=growth, wait_for_sync=True)
630 Log("increase disk/%s by %s MB", idx, growth, indent=2)
631 self.ExecOrQueue(instance, [op])
634 def BurnReplaceDisks1D8(self):
635 """Replace disks on primary and secondary for drbd8."""
636 Log("Replacing disks on the same nodes")
637 early_release = self.opts.early_release
638 for instance in self.instances:
639 Log("instance %s", instance, indent=1)
641 for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
642 op = opcodes.OpInstanceReplaceDisks(instance_name=instance,
644 disks=list(range(self.disk_count)),
645 early_release=early_release)
646 Log("run %s", mode, indent=2)
648 self.ExecOrQueue(instance, ops)
651 def BurnReplaceDisks2(self):
652 """Replace secondary node."""
653 Log("Changing the secondary node")
654 mode = constants.REPLACE_DISK_CHG
656 mytor = izip(islice(cycle(self.nodes), 2, None),
658 for tnode, instance in mytor:
659 Log("instance %s", instance, indent=1)
660 if self.opts.iallocator:
662 msg = "with iallocator %s" % self.opts.iallocator
665 op = opcodes.OpInstanceReplaceDisks(instance_name=instance,
668 iallocator=self.opts.iallocator,
670 early_release=self.opts.early_release)
671 Log("run %s %s", mode, msg, indent=2)
672 self.ExecOrQueue(instance, [op])
676 def BurnFailover(self):
677 """Failover the instances."""
678 Log("Failing over instances")
679 for instance in self.instances:
680 Log("instance %s", instance, indent=1)
681 op = opcodes.OpInstanceFailover(instance_name=instance,
682 ignore_consistency=False)
683 self.ExecOrQueue(instance, [op])
688 """Move the instances."""
689 Log("Moving instances")
690 mytor = izip(islice(cycle(self.nodes), 1, None),
692 for tnode, instance in mytor:
693 Log("instance %s", instance, indent=1)
694 op = opcodes.OpInstanceMove(instance_name=instance,
696 self.ExecOrQueue(instance, [op])
699 def BurnMigrate(self):
700 """Migrate the instances."""
701 Log("Migrating instances")
702 for instance in self.instances:
703 Log("instance %s", instance, indent=1)
704 op1 = opcodes.OpInstanceMigrate(instance_name=instance, mode=None,
707 op2 = opcodes.OpInstanceMigrate(instance_name=instance, mode=None,
709 Log("migration and migration cleanup", indent=2)
710 self.ExecOrQueue(instance, [op1, op2])
714 def BurnImportExport(self):
715 """Export the instance, delete it, and import it back.
718 Log("Exporting and re-importing instances")
719 mytor = izip(cycle(self.nodes),
720 islice(cycle(self.nodes), 1, None),
721 islice(cycle(self.nodes), 2, None),
724 for pnode, snode, enode, instance in mytor:
725 Log("instance %s", instance, indent=1)
726 # read the full name of the instance
727 nam_op = opcodes.OpInstanceQuery(output_fields=["name"],
728 names=[instance], use_locking=True)
729 full_name = self.ExecOp(False, nam_op)[0][0]
731 if self.opts.iallocator:
733 import_log_msg = ("import from %s"
734 " with iallocator %s" %
735 (enode, self.opts.iallocator))
736 elif self.opts.disk_template not in constants.DTS_INT_MIRROR:
738 import_log_msg = ("import from %s to %s" %
741 import_log_msg = ("import from %s to %s, %s" %
742 (enode, pnode, snode))
744 exp_op = opcodes.OpBackupExport(instance_name=instance,
746 mode=constants.EXPORT_MODE_LOCAL,
748 rem_op = opcodes.OpInstanceRemove(instance_name=instance,
749 ignore_failures=True)
750 imp_dir = utils.PathJoin(constants.EXPORT_DIR, full_name)
751 imp_op = opcodes.OpInstanceCreate(instance_name=instance,
752 disks=[{"size": size}
753 for size in self.disk_size],
754 disk_template=self.opts.disk_template,
756 mode=constants.INSTANCE_IMPORT,
762 ip_check=self.opts.ip_check,
763 name_check=self.opts.name_check,
765 file_storage_dir=None,
767 iallocator=self.opts.iallocator,
770 osparams=self.opts.osparams,
773 erem_op = opcodes.OpBackupRemove(instance_name=instance)
775 Log("export to node %s", enode, indent=2)
776 Log("remove instance", indent=2)
777 Log(import_log_msg, indent=2)
778 Log("remove export", indent=2)
779 self.ExecOrQueue(instance, [exp_op, rem_op, imp_op, erem_op])
782 def StopInstanceOp(instance):
783 """Stop given instance."""
784 return opcodes.OpInstanceShutdown(instance_name=instance)
787 def StartInstanceOp(instance):
788 """Start given instance."""
789 return opcodes.OpInstanceStartup(instance_name=instance, force=False)
792 def RenameInstanceOp(instance, instance_new):
793 """Rename instance."""
794 return opcodes.OpInstanceRename(instance_name=instance,
795 new_name=instance_new)
799 def BurnStopStart(self):
800 """Stop/start the instances."""
801 Log("Stopping and starting instances")
802 for instance in self.instances:
803 Log("instance %s", instance, indent=1)
804 op1 = self.StopInstanceOp(instance)
805 op2 = self.StartInstanceOp(instance)
806 self.ExecOrQueue(instance, [op1, op2])
809 def BurnRemove(self):
810 """Remove the instances."""
811 Log("Removing instances")
812 for instance in self.to_rem:
813 Log("instance %s", instance, indent=1)
814 op = opcodes.OpInstanceRemove(instance_name=instance,
815 ignore_failures=True)
816 self.ExecOrQueue(instance, [op])
818 def BurnRename(self):
819 """Rename the instances.
821 Note that this function will not execute in parallel, since we
822 only have one target for rename.
825 Log("Renaming instances")
826 rename = self.opts.rename
827 for instance in self.instances:
828 Log("instance %s", instance, indent=1)
829 op_stop1 = self.StopInstanceOp(instance)
830 op_stop2 = self.StopInstanceOp(rename)
831 op_rename1 = self.RenameInstanceOp(instance, rename)
832 op_rename2 = self.RenameInstanceOp(rename, instance)
833 op_start1 = self.StartInstanceOp(rename)
834 op_start2 = self.StartInstanceOp(instance)
835 self.ExecOp(False, op_stop1, op_rename1, op_start1)
836 self._CheckInstanceAlive(rename)
837 self.ExecOp(False, op_stop2, op_rename2, op_start2)
838 self._CheckInstanceAlive(instance)
842 def BurnReinstall(self):
843 """Reinstall the instances."""
844 Log("Reinstalling instances")
845 for instance in self.instances:
846 Log("instance %s", instance, indent=1)
847 op1 = self.StopInstanceOp(instance)
848 op2 = opcodes.OpInstanceReinstall(instance_name=instance)
849 Log("reinstall without passing the OS", indent=2)
850 op3 = opcodes.OpInstanceReinstall(instance_name=instance,
851 os_type=self.opts.os)
852 Log("reinstall specifying the OS", indent=2)
853 op4 = self.StartInstanceOp(instance)
854 self.ExecOrQueue(instance, [op1, op2, op3, op4])
858 def BurnReboot(self):
859 """Reboot the instances."""
860 Log("Rebooting instances")
861 for instance in self.instances:
862 Log("instance %s", instance, indent=1)
864 for reboot_type in self.opts.reboot_types:
865 op = opcodes.OpInstanceReboot(instance_name=instance,
866 reboot_type=reboot_type,
867 ignore_secondaries=False)
868 Log("reboot with type '%s'", reboot_type, indent=2)
870 self.ExecOrQueue(instance, ops)
874 def BurnActivateDisks(self):
875 """Activate and deactivate disks of the instances."""
876 Log("Activating/deactivating disks")
877 for instance in self.instances:
878 Log("instance %s", instance, indent=1)
879 op_start = self.StartInstanceOp(instance)
880 op_act = opcodes.OpInstanceActivateDisks(instance_name=instance)
881 op_deact = opcodes.OpInstanceDeactivateDisks(instance_name=instance)
882 op_stop = self.StopInstanceOp(instance)
883 Log("activate disks when online", indent=2)
884 Log("activate disks when offline", indent=2)
885 Log("deactivate disks (when offline)", indent=2)
886 self.ExecOrQueue(instance, [op_act, op_stop, op_act, op_deact, op_start])
890 def BurnAddRemoveDisks(self):
891 """Add and remove an extra disk for the instances."""
892 Log("Adding and removing disks")
893 for instance in self.instances:
894 Log("instance %s", instance, indent=1)
895 op_add = opcodes.OpInstanceSetParams(\
896 instance_name=instance,
897 disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})])
898 op_rem = opcodes.OpInstanceSetParams(\
899 instance_name=instance, disks=[(constants.DDM_REMOVE, {})])
900 op_stop = self.StopInstanceOp(instance)
901 op_start = self.StartInstanceOp(instance)
902 Log("adding a disk", indent=2)
903 Log("removing last disk", indent=2)
904 self.ExecOrQueue(instance, [op_add, op_stop, op_rem, op_start])
907 def BurnAddRemoveNICs(self):
908 """Add and remove an extra NIC for the instances."""
909 Log("Adding and removing NICs")
910 for instance in self.instances:
911 Log("instance %s", instance, indent=1)
912 op_add = opcodes.OpInstanceSetParams(\
913 instance_name=instance, nics=[(constants.DDM_ADD, {})])
914 op_rem = opcodes.OpInstanceSetParams(\
915 instance_name=instance, nics=[(constants.DDM_REMOVE, {})])
916 Log("adding a NIC", indent=2)
917 Log("removing last NIC", indent=2)
918 self.ExecOrQueue(instance, [op_add, op_rem])
920 def ConfdCallback(self, reply):
921 """Callback for confd queries"""
922 if reply.type == confd_client.UPCALL_REPLY:
923 if reply.server_reply.status != constants.CONFD_REPL_STATUS_OK:
924 Err("Query %s gave non-ok status %s: %s" % (reply.orig_request,
925 reply.server_reply.status,
927 if reply.orig_request.type == constants.CONFD_REQ_PING:
928 Log("Ping: OK", indent=1)
929 elif reply.orig_request.type == constants.CONFD_REQ_CLUSTER_MASTER:
930 if reply.server_reply.answer == self.cluster_info["master"]:
931 Log("Master: OK", indent=1)
933 Err("Master: wrong: %s" % reply.server_reply.answer)
934 elif reply.orig_request.type == constants.CONFD_REQ_NODE_ROLE_BYNAME:
935 if reply.server_reply.answer == constants.CONFD_NODE_ROLE_MASTER:
936 Log("Node role for master: OK", indent=1)
938 Err("Node role for master: wrong: %s" % reply.server_reply.answer)
940 def DoConfdRequestReply(self, req):
941 self.confd_counting_callback.RegisterQuery(req.rsalt)
942 self.confd_client.SendRequest(req, async=False)
943 while not self.confd_counting_callback.AllAnswered():
944 if not self.confd_client.ReceiveReply():
945 Err("Did not receive all expected confd replies")
949 """Run confd queries for our instances.
951 The following confd queries are tested:
952 - CONFD_REQ_PING: simple ping
953 - CONFD_REQ_CLUSTER_MASTER: cluster master
954 - CONFD_REQ_NODE_ROLE_BYNAME: node role, for the master
957 Log("Checking confd results")
959 filter_callback = confd_client.ConfdFilterCallback(self.ConfdCallback)
960 counting_callback = confd_client.ConfdCountingCallback(filter_callback)
961 self.confd_counting_callback = counting_callback
963 self.confd_client = confd_client.GetConfdClient(counting_callback)
965 req = confd_client.ConfdClientRequest(type=constants.CONFD_REQ_PING)
966 self.DoConfdRequestReply(req)
968 req = confd_client.ConfdClientRequest(
969 type=constants.CONFD_REQ_CLUSTER_MASTER)
970 self.DoConfdRequestReply(req)
972 req = confd_client.ConfdClientRequest(
973 type=constants.CONFD_REQ_NODE_ROLE_BYNAME,
974 query=self.cluster_info["master"])
975 self.DoConfdRequestReply(req)
977 def _CheckInstanceAlive(self, instance):
978 """Check if an instance is alive by doing http checks.
980 This will try to retrieve the url on the instance /hostname.txt
981 and check that it contains the hostname of the instance. In case
982 we get ECONNREFUSED, we retry up to the net timeout seconds, for
983 any other error we abort.
986 if not self.opts.http_check:
988 end_time = time.time() + self.opts.net_timeout
990 while time.time() < end_time and url is None:
992 url = self.url_opener.open("http://%s/hostname.txt" % instance)
994 # here we can have connection refused, no route to host, etc.
997 raise InstanceDown(instance, "Cannot contact instance")
998 hostname = url.read().strip()
1000 if hostname != instance:
1001 raise InstanceDown(instance, ("Hostname mismatch, expected %s, got %s" %
1002 (instance, hostname)))
1004 def BurninCluster(self):
1005 """Test a cluster intensively.
1007 This will create instances and then start/stop/failover them.
1008 It is safe for existing instances but could impact performance.
1014 Log("Testing global parameters")
1016 if (len(self.nodes) == 1 and
1017 opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
1019 constants.DT_SHARED_FILE)):
1020 Err("When one node is available/selected the disk template must"
1021 " be 'diskless', 'file' or 'plain'")
1023 if opts.do_confd_tests and not constants.ENABLE_CONFD:
1024 Err("You selected confd tests but confd was disabled at configure time")
1028 self.BurnCreateInstances()
1030 if self.bep[constants.BE_MINMEM] < self.bep[constants.BE_MAXMEM]:
1031 self.BurnModifyRuntimeMemory()
1033 if opts.do_replace1 and opts.disk_template in constants.DTS_INT_MIRROR:
1034 self.BurnReplaceDisks1D8()
1035 if (opts.do_replace2 and len(self.nodes) > 2 and
1036 opts.disk_template in constants.DTS_INT_MIRROR):
1037 self.BurnReplaceDisks2()
1039 if (opts.disk_template in constants.DTS_GROWABLE and
1040 compat.any(n > 0 for n in self.disk_growth)):
1041 self.BurnGrowDisks()
1043 if opts.do_failover and opts.disk_template in constants.DTS_MIRRORED:
1047 if opts.disk_template not in constants.DTS_MIRRORED:
1048 Log("Skipping migration (disk template %s does not support it)",
1050 elif not self.hv_class.CAN_MIGRATE:
1051 Log("Skipping migration (hypervisor %s does not support it)",
1056 if (opts.do_move and len(self.nodes) > 1 and
1057 opts.disk_template in [constants.DT_PLAIN, constants.DT_FILE]):
1060 if (opts.do_importexport and
1061 opts.disk_template not in (constants.DT_DISKLESS,
1062 constants.DT_SHARED_FILE,
1063 constants.DT_FILE)):
1064 self.BurnImportExport()
1066 if opts.do_reinstall:
1067 self.BurnReinstall()
1072 if opts.do_addremove_disks:
1073 self.BurnAddRemoveDisks()
1075 default_nic_mode = self.cluster_default_nicparams[constants.NIC_MODE]
1076 # Don't add/remove nics in routed mode, as we would need an ip to add
1078 if opts.do_addremove_nics:
1079 if default_nic_mode == constants.NIC_MODE_BRIDGED:
1080 self.BurnAddRemoveNICs()
1082 Log("Skipping nic add/remove as the cluster is not in bridged mode")
1084 if opts.do_activate_disks:
1085 self.BurnActivateDisks()
1090 if opts.do_confd_tests:
1093 if opts.do_startstop:
1094 self.BurnStopStart()
1099 Log("Error detected: opcode buffer follows:\n\n")
1100 Log(self.GetFeedbackBuf())
1102 if not self.opts.keep_instances:
1105 except Exception, err: # pylint: disable=W0703
1106 if has_err: # already detected errors, so errors in removal
1107 # are quite expected
1108 Log("Note: error detected during instance remove: %s", err)
1109 else: # non-expected error
1112 return constants.EXIT_SUCCESS
1119 utils.SetupLogging(constants.LOG_BURNIN, sys.argv[0],
1120 debug=False, stderr_logging=True)
1122 return Burner().BurninCluster()
1125 if __name__ == "__main__":