4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 from itertools import izip, islice, cycle
32 from cStringIO import StringIO
34 from ganeti import opcodes
35 from ganeti import constants
36 from ganeti import cli
37 from ganeti import errors
38 from ganeti import utils
39 from ganeti import hypervisor
40 from ganeti import compat
41 from ganeti import pathutils
43 from ganeti.confd import client as confd_client
46 USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
56 class InstanceDown(Exception):
57 """The checked instance was not up"""
60 class BurninFailure(Exception):
61 """Failure detected during burning"""
65 """Shows program usage information and exits the program."""
67 print >> sys.stderr, "Usage:"
68 print >> sys.stderr, USAGE
72 def Log(msg, *args, **kwargs):
73 """Simple function that prints out its argument.
78 indent = kwargs.get("indent", 0)
79 sys.stdout.write("%*s%s%s\n" % (2 * indent, "",
80 LOG_HEADERS.get(indent, " "), msg))
84 def Err(msg, exit_code=1):
85 """Simple error logging that prints to stderr.
88 sys.stderr.write(msg + "\n")
93 class SimpleOpener(urllib.FancyURLopener):
94 """A simple url opener"""
95 # pylint: disable=W0221
97 def prompt_user_passwd(self, host, realm, clear_cache=0):
98 """No-interaction version of prompt_user_passwd."""
99 # we follow parent class' API
100 # pylint: disable=W0613
103 def http_error_default(self, url, fp, errcode, errmsg, headers):
104 """Custom error handling"""
105 # make sure sockets are not left in CLOSE_WAIT, this is similar
106 # but with a different exception to the BasicURLOpener class
107 _ = fp.read() # throw away data
109 raise InstanceDown("HTTP error returned: code %s, msg %s" %
114 cli.cli_option("-o", "--os", dest="os", default=None,
115 help="OS to use during burnin",
117 completion_suggest=cli.OPT_COMPL_ONE_OS),
120 cli.cli_option("--disk-size", dest="disk_size",
121 help="Disk size (determines disk count)",
122 default="128m", type="string", metavar="<size,size,...>",
123 completion_suggest=("128M 512M 1G 4G 1G,256M"
124 " 4G,1G,1G 10G").split()),
125 cli.cli_option("--disk-growth", dest="disk_growth", help="Disk growth",
126 default="128m", type="string", metavar="<size,size,...>"),
127 cli.cli_option("--mem-size", dest="mem_size", help="Memory size",
128 default=None, type="unit", metavar="<size>",
129 completion_suggest=("128M 256M 512M 1G 4G 8G"
130 " 12G 16G").split()),
131 cli.cli_option("--maxmem-size", dest="maxmem_size", help="Max Memory size",
132 default=256, type="unit", metavar="<size>",
133 completion_suggest=("128M 256M 512M 1G 4G 8G"
134 " 12G 16G").split()),
135 cli.cli_option("--minmem-size", dest="minmem_size", help="Min Memory size",
136 default=128, type="unit", metavar="<size>",
137 completion_suggest=("128M 256M 512M 1G 4G 8G"
138 " 12G 16G").split()),
139 cli.cli_option("--vcpu-count", dest="vcpu_count", help="VCPU count",
140 default=3, type="unit", metavar="<count>",
141 completion_suggest=("1 2 3 4").split()),
146 cli.EARLY_RELEASE_OPT,
147 cli.cli_option("--no-replace1", dest="do_replace1",
148 help="Skip disk replacement with the same secondary",
149 action="store_false", default=True),
150 cli.cli_option("--no-replace2", dest="do_replace2",
151 help="Skip disk replacement with a different secondary",
152 action="store_false", default=True),
153 cli.cli_option("--no-failover", dest="do_failover",
154 help="Skip instance failovers", action="store_false",
156 cli.cli_option("--no-migrate", dest="do_migrate",
157 help="Skip instance live migration",
158 action="store_false", default=True),
159 cli.cli_option("--no-move", dest="do_move",
160 help="Skip instance moves", action="store_false",
162 cli.cli_option("--no-importexport", dest="do_importexport",
163 help="Skip instance export/import", action="store_false",
165 cli.cli_option("--no-startstop", dest="do_startstop",
166 help="Skip instance stop/start", action="store_false",
168 cli.cli_option("--no-reinstall", dest="do_reinstall",
169 help="Skip instance reinstall", action="store_false",
171 cli.cli_option("--no-reboot", dest="do_reboot",
172 help="Skip instance reboot", action="store_false",
174 cli.cli_option("--reboot-types", dest="reboot_types",
175 help="Specify the reboot types", default=None),
176 cli.cli_option("--no-activate-disks", dest="do_activate_disks",
177 help="Skip disk activation/deactivation",
178 action="store_false", default=True),
179 cli.cli_option("--no-add-disks", dest="do_addremove_disks",
180 help="Skip disk addition/removal",
181 action="store_false", default=True),
182 cli.cli_option("--no-add-nics", dest="do_addremove_nics",
183 help="Skip NIC addition/removal",
184 action="store_false", default=True),
185 cli.cli_option("--no-nics", dest="nics",
186 help="No network interfaces", action="store_const",
187 const=[], default=[{}]),
188 cli.cli_option("--no-confd", dest="do_confd_tests",
189 help="Skip confd queries",
190 action="store_false", default=constants.ENABLE_CONFD),
191 cli.cli_option("--rename", dest="rename", default=None,
192 help=("Give one unused instance name which is taken"
193 " to start the renaming sequence"),
194 metavar="<instance_name>"),
195 cli.cli_option("-t", "--disk-template", dest="disk_template",
196 choices=list(constants.DISK_TEMPLATES),
197 default=constants.DT_DRBD8,
198 help="Disk template (diskless, file, plain, sharedfile"
200 cli.cli_option("-n", "--nodes", dest="nodes", default="",
201 help=("Comma separated list of nodes to perform"
202 " the burnin on (defaults to all nodes)"),
203 completion_suggest=cli.OPT_COMPL_MANY_NODES),
204 cli.cli_option("-I", "--iallocator", dest="iallocator",
205 default=None, type="string",
206 help=("Perform the allocation using an iallocator"
207 " instead of fixed node spread (node restrictions no"
208 " longer apply, therefore -n/--nodes must not be"
210 completion_suggest=cli.OPT_COMPL_ONE_IALLOCATOR),
211 cli.cli_option("-p", "--parallel", default=False, action="store_true",
213 help=("Enable parallelization of some operations in"
214 " order to speed burnin or to test granular locking")),
215 cli.cli_option("--net-timeout", default=15, type="int",
217 help=("The instance check network timeout in seconds"
218 " (defaults to 15 seconds)"),
219 completion_suggest="15 60 300 900".split()),
220 cli.cli_option("-C", "--http-check", default=False, action="store_true",
222 help=("Enable checking of instance status via http,"
223 " looking for /hostname.txt that should contain the"
224 " name of the instance")),
225 cli.cli_option("-K", "--keep-instances", default=False,
227 dest="keep_instances",
228 help=("Leave instances on the cluster after burnin,"
229 " for investigation in case of errors or simply"
233 # Mainly used for bash completion
234 ARGUMENTS = [cli.ArgInstance(min=1)]
237 def _DoCheckInstances(fn):
238 """Decorator for checking instances.
241 def wrapper(self, *args, **kwargs):
242 val = fn(self, *args, **kwargs)
243 for instance in self.instances:
244 self._CheckInstanceAlive(instance) # pylint: disable=W0212
251 """Decorator for possible batch operations.
253 Must come after the _DoCheckInstances decorator (if any).
255 @param retry: whether this is a retryable batch, will be
260 def batched(self, *args, **kwargs):
261 self.StartBatch(retry)
262 val = fn(self, *args, **kwargs)
270 class Burner(object):
275 self.url_opener = SimpleOpener()
276 self._feed_buf = StringIO()
282 self.queue_retry = False
283 self.disk_count = self.disk_growth = self.disk_size = None
284 self.hvp = self.bep = None
286 self.cl = cli.GetClient()
289 def ClearFeedbackBuf(self):
290 """Clear the feedback buffer."""
291 self._feed_buf.truncate(0)
293 def GetFeedbackBuf(self):
294 """Return the contents of the buffer."""
295 return self._feed_buf.getvalue()
297 def Feedback(self, msg):
298 """Acumulate feedback in our buffer."""
299 formatted_msg = "%s %s" % (time.ctime(utils.MergeTime(msg[0])), msg[2])
300 self._feed_buf.write(formatted_msg + "\n")
301 if self.opts.verbose:
302 Log(formatted_msg, indent=3)
304 def MaybeRetry(self, retry_count, msg, fn, *args):
305 """Possibly retry a given function execution.
307 @type retry_count: int
308 @param retry_count: retry counter:
309 - 0: non-retryable action
310 - 1: last retry for a retryable action
311 - MAX_RETRIES: original try for a retryable action
313 @param msg: the kind of the operation
315 @param fn: the function to be called
320 if retry_count > 0 and retry_count < MAX_RETRIES:
321 Log("Idempotent %s succeeded after %d retries",
322 msg, MAX_RETRIES - retry_count)
324 except Exception, err: # pylint: disable=W0703
326 Log("Non-idempotent %s failed, aborting", msg)
328 elif retry_count == 1:
329 Log("Idempotent %s repeated failure, aborting", msg)
332 Log("Idempotent %s failed, retry #%d/%d: %s",
333 msg, MAX_RETRIES - retry_count + 1, MAX_RETRIES, err)
334 self.MaybeRetry(retry_count - 1, msg, fn, *args)
336 def _ExecOp(self, *ops):
337 """Execute one or more opcodes and manage the exec buffer.
339 @return: if only opcode has been passed, we return its result;
340 otherwise we return the list of results
343 job_id = cli.SendJob(ops, cl=self.cl)
344 results = cli.PollJob(job_id, cl=self.cl, feedback_fn=self.Feedback)
350 def ExecOp(self, retry, *ops):
351 """Execute one or more opcodes and manage the exec buffer.
353 @return: if only opcode has been passed, we return its result;
354 otherwise we return the list of results
361 cli.SetGenericOpcodeOpts(ops, self.opts)
362 return self.MaybeRetry(rval, "opcode", self._ExecOp, *ops)
364 def ExecOrQueue(self, name, ops, post_process=None):
365 """Execute an opcode and manage the exec buffer."""
366 if self.opts.parallel:
367 cli.SetGenericOpcodeOpts(ops, self.opts)
368 self.queued_ops.append((ops, name, post_process))
370 val = self.ExecOp(self.queue_retry, *ops) # pylint: disable=W0142
371 if post_process is not None:
375 def StartBatch(self, retry):
376 """Start a new batch of jobs.
378 @param retry: whether this is a retryable batch
382 self.queue_retry = retry
384 def CommitQueue(self):
385 """Execute all submitted opcodes in case of parallel burnin"""
386 if not self.opts.parallel or not self.queued_ops:
395 results = self.MaybeRetry(rval, "jobset", self.ExecJobSet,
401 def ExecJobSet(self, jobs):
402 """Execute a set of jobs and return once all are done.
404 The method will return the list of results, if all jobs are
405 successful. Otherwise, OpExecError will be raised from within
409 self.ClearFeedbackBuf()
410 jex = cli.JobExecutor(cl=self.cl, feedback_fn=self.Feedback)
411 for ops, name, _ in jobs:
412 jex.QueueJob(name, *ops) # pylint: disable=W0142
414 results = jex.GetResults()
415 except Exception, err: # pylint: disable=W0703
416 Log("Jobs failed: %s", err)
417 raise BurninFailure()
421 for (_, name, post_process), (success, result) in zip(jobs, results):
426 except Exception, err: # pylint: disable=W0703
427 Log("Post process call for job %s failed: %s", name, err)
434 raise BurninFailure()
438 def ParseOptions(self):
439 """Parses the command line options.
441 In case of command line errors, it will show the usage and exit the
445 parser = optparse.OptionParser(usage="\n%s" % USAGE,
446 version=("%%prog (ganeti) %s" %
447 constants.RELEASE_VERSION),
450 options, args = parser.parse_args()
451 if len(args) < 1 or options.os is None:
455 options.maxmem_size = options.mem_size
456 options.minmem_size = options.mem_size
457 elif options.minmem_size > options.maxmem_size:
458 Err("Maximum memory lower than minimum memory")
460 supported_disk_templates = (constants.DT_DISKLESS,
462 constants.DT_SHARED_FILE,
467 if options.disk_template not in supported_disk_templates:
468 Err("Unknown disk template '%s'" % options.disk_template)
470 if options.disk_template == constants.DT_DISKLESS:
471 disk_size = disk_growth = []
472 options.do_addremove_disks = False
474 disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
475 disk_growth = [utils.ParseUnit(v)
476 for v in options.disk_growth.split(",")]
477 if len(disk_growth) != len(disk_size):
478 Err("Wrong disk sizes/growth combination")
479 if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
480 (not disk_size and options.disk_template != constants.DT_DISKLESS)):
481 Err("Wrong disk count/disk template combination")
483 self.disk_size = disk_size
484 self.disk_growth = disk_growth
485 self.disk_count = len(disk_size)
487 if options.nodes and options.iallocator:
488 Err("Give either the nodes option or the iallocator option, not both")
490 if options.http_check and not options.name_check:
491 Err("Can't enable HTTP checks without name checks")
494 self.instances = args
496 constants.BE_MINMEM: options.minmem_size,
497 constants.BE_MAXMEM: options.maxmem_size,
498 constants.BE_VCPUS: options.vcpu_count,
501 self.hypervisor = None
503 if options.hypervisor:
504 self.hypervisor, self.hvp = options.hypervisor
506 if options.reboot_types is None:
507 options.reboot_types = constants.REBOOT_TYPES
509 options.reboot_types = options.reboot_types.split(",")
510 rt_diff = set(options.reboot_types).difference(constants.REBOOT_TYPES)
512 Err("Invalid reboot types specified: %s" % utils.CommaJoin(rt_diff))
514 socket.setdefaulttimeout(options.net_timeout)
517 """Read the cluster state from the master daemon."""
519 names = self.opts.nodes.split(",")
523 op = opcodes.OpNodeQuery(output_fields=["name", "offline", "drained"],
524 names=names, use_locking=True)
525 result = self.ExecOp(True, op)
526 except errors.GenericError, err:
527 err_code, msg = cli.FormatError(err)
528 Err(msg, exit_code=err_code)
529 self.nodes = [data[0] for data in result if not (data[1] or data[2])]
531 op_diagnose = opcodes.OpOsDiagnose(output_fields=["name",
535 result = self.ExecOp(True, op_diagnose)
538 Err("Can't get the OS list")
541 for (name, variants, _) in result:
542 if self.opts.os in cli.CalculateOSNames(name, variants):
547 Err("OS '%s' not found" % self.opts.os)
549 cluster_info = self.cl.QueryClusterInfo()
550 self.cluster_info = cluster_info
551 if not self.cluster_info:
552 Err("Can't get cluster info")
554 default_nic_params = self.cluster_info["nicparams"][constants.PP_DEFAULT]
555 self.cluster_default_nicparams = default_nic_params
556 if self.hypervisor is None:
557 self.hypervisor = self.cluster_info["default_hypervisor"]
558 self.hv_class = hypervisor.GetHypervisorClass(self.hypervisor)
562 def BurnCreateInstances(self):
563 """Create the given instances.
567 mytor = izip(cycle(self.nodes),
568 islice(cycle(self.nodes), 1, None),
571 Log("Creating instances")
572 for pnode, snode, instance in mytor:
573 Log("instance %s", instance, indent=1)
574 if self.opts.iallocator:
576 msg = "with iallocator %s" % self.opts.iallocator
577 elif self.opts.disk_template not in constants.DTS_INT_MIRROR:
579 msg = "on %s" % pnode
581 msg = "on %s, %s" % (pnode, snode)
585 op = opcodes.OpInstanceCreate(instance_name=instance,
586 disks=[{"size": size}
587 for size in self.disk_size],
588 disk_template=self.opts.disk_template,
590 mode=constants.INSTANCE_CREATE,
591 os_type=self.opts.os,
595 ip_check=self.opts.ip_check,
596 name_check=self.opts.name_check,
599 file_storage_dir=None,
600 iallocator=self.opts.iallocator,
603 hypervisor=self.hypervisor,
604 osparams=self.opts.osparams,
606 remove_instance = lambda name: lambda: self.to_rem.append(name)
607 self.ExecOrQueue(instance, [op], post_process=remove_instance(instance))
610 def BurnModifyRuntimeMemory(self):
611 """Alter the runtime memory."""
612 Log("Setting instance runtime memory")
613 for instance in self.instances:
614 Log("instance %s", instance, indent=1)
615 tgt_mem = self.bep[constants.BE_MINMEM]
616 op = opcodes.OpInstanceSetParams(instance_name=instance,
618 Log("Set memory to %s MB", tgt_mem, indent=2)
619 self.ExecOrQueue(instance, [op])
622 def BurnGrowDisks(self):
623 """Grow both the os and the swap disks by the requested amount, if any."""
625 for instance in self.instances:
626 Log("instance %s", instance, indent=1)
627 for idx, growth in enumerate(self.disk_growth):
629 op = opcodes.OpInstanceGrowDisk(instance_name=instance, disk=idx,
630 amount=growth, wait_for_sync=True)
631 Log("increase disk/%s by %s MB", idx, growth, indent=2)
632 self.ExecOrQueue(instance, [op])
635 def BurnReplaceDisks1D8(self):
636 """Replace disks on primary and secondary for drbd8."""
637 Log("Replacing disks on the same nodes")
638 early_release = self.opts.early_release
639 for instance in self.instances:
640 Log("instance %s", instance, indent=1)
642 for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
643 op = opcodes.OpInstanceReplaceDisks(instance_name=instance,
645 disks=list(range(self.disk_count)),
646 early_release=early_release)
647 Log("run %s", mode, indent=2)
649 self.ExecOrQueue(instance, ops)
652 def BurnReplaceDisks2(self):
653 """Replace secondary node."""
654 Log("Changing the secondary node")
655 mode = constants.REPLACE_DISK_CHG
657 mytor = izip(islice(cycle(self.nodes), 2, None),
659 for tnode, instance in mytor:
660 Log("instance %s", instance, indent=1)
661 if self.opts.iallocator:
663 msg = "with iallocator %s" % self.opts.iallocator
666 op = opcodes.OpInstanceReplaceDisks(instance_name=instance,
669 iallocator=self.opts.iallocator,
671 early_release=self.opts.early_release)
672 Log("run %s %s", mode, msg, indent=2)
673 self.ExecOrQueue(instance, [op])
677 def BurnFailover(self):
678 """Failover the instances."""
679 Log("Failing over instances")
680 for instance in self.instances:
681 Log("instance %s", instance, indent=1)
682 op = opcodes.OpInstanceFailover(instance_name=instance,
683 ignore_consistency=False)
684 self.ExecOrQueue(instance, [op])
689 """Move the instances."""
690 Log("Moving instances")
691 mytor = izip(islice(cycle(self.nodes), 1, None),
693 for tnode, instance in mytor:
694 Log("instance %s", instance, indent=1)
695 op = opcodes.OpInstanceMove(instance_name=instance,
697 self.ExecOrQueue(instance, [op])
700 def BurnMigrate(self):
701 """Migrate the instances."""
702 Log("Migrating instances")
703 for instance in self.instances:
704 Log("instance %s", instance, indent=1)
705 op1 = opcodes.OpInstanceMigrate(instance_name=instance, mode=None,
708 op2 = opcodes.OpInstanceMigrate(instance_name=instance, mode=None,
710 Log("migration and migration cleanup", indent=2)
711 self.ExecOrQueue(instance, [op1, op2])
715 def BurnImportExport(self):
716 """Export the instance, delete it, and import it back.
719 Log("Exporting and re-importing instances")
720 mytor = izip(cycle(self.nodes),
721 islice(cycle(self.nodes), 1, None),
722 islice(cycle(self.nodes), 2, None),
725 for pnode, snode, enode, instance in mytor:
726 Log("instance %s", instance, indent=1)
727 # read the full name of the instance
728 nam_op = opcodes.OpInstanceQuery(output_fields=["name"],
729 names=[instance], use_locking=True)
730 full_name = self.ExecOp(False, nam_op)[0][0]
732 if self.opts.iallocator:
734 import_log_msg = ("import from %s"
735 " with iallocator %s" %
736 (enode, self.opts.iallocator))
737 elif self.opts.disk_template not in constants.DTS_INT_MIRROR:
739 import_log_msg = ("import from %s to %s" %
742 import_log_msg = ("import from %s to %s, %s" %
743 (enode, pnode, snode))
745 exp_op = opcodes.OpBackupExport(instance_name=instance,
747 mode=constants.EXPORT_MODE_LOCAL,
749 rem_op = opcodes.OpInstanceRemove(instance_name=instance,
750 ignore_failures=True)
751 imp_dir = utils.PathJoin(pathutils.EXPORT_DIR, full_name)
752 imp_op = opcodes.OpInstanceCreate(instance_name=instance,
753 disks=[{"size": size}
754 for size in self.disk_size],
755 disk_template=self.opts.disk_template,
757 mode=constants.INSTANCE_IMPORT,
763 ip_check=self.opts.ip_check,
764 name_check=self.opts.name_check,
766 file_storage_dir=None,
768 iallocator=self.opts.iallocator,
771 osparams=self.opts.osparams,
774 erem_op = opcodes.OpBackupRemove(instance_name=instance)
776 Log("export to node %s", enode, indent=2)
777 Log("remove instance", indent=2)
778 Log(import_log_msg, indent=2)
779 Log("remove export", indent=2)
780 self.ExecOrQueue(instance, [exp_op, rem_op, imp_op, erem_op])
783 def StopInstanceOp(instance):
784 """Stop given instance."""
785 return opcodes.OpInstanceShutdown(instance_name=instance)
788 def StartInstanceOp(instance):
789 """Start given instance."""
790 return opcodes.OpInstanceStartup(instance_name=instance, force=False)
793 def RenameInstanceOp(instance, instance_new):
794 """Rename instance."""
795 return opcodes.OpInstanceRename(instance_name=instance,
796 new_name=instance_new)
800 def BurnStopStart(self):
801 """Stop/start the instances."""
802 Log("Stopping and starting instances")
803 for instance in self.instances:
804 Log("instance %s", instance, indent=1)
805 op1 = self.StopInstanceOp(instance)
806 op2 = self.StartInstanceOp(instance)
807 self.ExecOrQueue(instance, [op1, op2])
810 def BurnRemove(self):
811 """Remove the instances."""
812 Log("Removing instances")
813 for instance in self.to_rem:
814 Log("instance %s", instance, indent=1)
815 op = opcodes.OpInstanceRemove(instance_name=instance,
816 ignore_failures=True)
817 self.ExecOrQueue(instance, [op])
819 def BurnRename(self):
820 """Rename the instances.
822 Note that this function will not execute in parallel, since we
823 only have one target for rename.
826 Log("Renaming instances")
827 rename = self.opts.rename
828 for instance in self.instances:
829 Log("instance %s", instance, indent=1)
830 op_stop1 = self.StopInstanceOp(instance)
831 op_stop2 = self.StopInstanceOp(rename)
832 op_rename1 = self.RenameInstanceOp(instance, rename)
833 op_rename2 = self.RenameInstanceOp(rename, instance)
834 op_start1 = self.StartInstanceOp(rename)
835 op_start2 = self.StartInstanceOp(instance)
836 self.ExecOp(False, op_stop1, op_rename1, op_start1)
837 self._CheckInstanceAlive(rename)
838 self.ExecOp(False, op_stop2, op_rename2, op_start2)
839 self._CheckInstanceAlive(instance)
843 def BurnReinstall(self):
844 """Reinstall the instances."""
845 Log("Reinstalling instances")
846 for instance in self.instances:
847 Log("instance %s", instance, indent=1)
848 op1 = self.StopInstanceOp(instance)
849 op2 = opcodes.OpInstanceReinstall(instance_name=instance)
850 Log("reinstall without passing the OS", indent=2)
851 op3 = opcodes.OpInstanceReinstall(instance_name=instance,
852 os_type=self.opts.os)
853 Log("reinstall specifying the OS", indent=2)
854 op4 = self.StartInstanceOp(instance)
855 self.ExecOrQueue(instance, [op1, op2, op3, op4])
859 def BurnReboot(self):
860 """Reboot the instances."""
861 Log("Rebooting instances")
862 for instance in self.instances:
863 Log("instance %s", instance, indent=1)
865 for reboot_type in self.opts.reboot_types:
866 op = opcodes.OpInstanceReboot(instance_name=instance,
867 reboot_type=reboot_type,
868 ignore_secondaries=False)
869 Log("reboot with type '%s'", reboot_type, indent=2)
871 self.ExecOrQueue(instance, ops)
875 def BurnActivateDisks(self):
876 """Activate and deactivate disks of the instances."""
877 Log("Activating/deactivating disks")
878 for instance in self.instances:
879 Log("instance %s", instance, indent=1)
880 op_start = self.StartInstanceOp(instance)
881 op_act = opcodes.OpInstanceActivateDisks(instance_name=instance)
882 op_deact = opcodes.OpInstanceDeactivateDisks(instance_name=instance)
883 op_stop = self.StopInstanceOp(instance)
884 Log("activate disks when online", indent=2)
885 Log("activate disks when offline", indent=2)
886 Log("deactivate disks (when offline)", indent=2)
887 self.ExecOrQueue(instance, [op_act, op_stop, op_act, op_deact, op_start])
891 def BurnAddRemoveDisks(self):
892 """Add and remove an extra disk for the instances."""
893 Log("Adding and removing disks")
894 for instance in self.instances:
895 Log("instance %s", instance, indent=1)
896 op_add = opcodes.OpInstanceSetParams(
897 instance_name=instance,
898 disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})])
899 op_rem = opcodes.OpInstanceSetParams(
900 instance_name=instance, disks=[(constants.DDM_REMOVE, {})])
901 op_stop = self.StopInstanceOp(instance)
902 op_start = self.StartInstanceOp(instance)
903 Log("adding a disk", indent=2)
904 Log("removing last disk", indent=2)
905 self.ExecOrQueue(instance, [op_add, op_stop, op_rem, op_start])
908 def BurnAddRemoveNICs(self):
909 """Add, change and remove an extra NIC for the instances."""
910 Log("Adding and removing NICs")
911 for instance in self.instances:
912 Log("instance %s", instance, indent=1)
913 op_add = opcodes.OpInstanceSetParams(
914 instance_name=instance, nics=[(constants.DDM_ADD, {})])
915 op_chg = opcodes.OpInstanceSetParams(
916 instance_name=instance, nics=[(constants.DDM_MODIFY,
917 -1, {"mac": constants.VALUE_GENERATE})])
918 op_rem = opcodes.OpInstanceSetParams(
919 instance_name=instance, nics=[(constants.DDM_REMOVE, {})])
920 Log("adding a NIC", indent=2)
921 Log("changing a NIC", indent=2)
922 Log("removing last NIC", indent=2)
923 self.ExecOrQueue(instance, [op_add, op_chg, op_rem])
925 def ConfdCallback(self, reply):
926 """Callback for confd queries"""
927 if reply.type == confd_client.UPCALL_REPLY:
928 if reply.server_reply.status != constants.CONFD_REPL_STATUS_OK:
929 Err("Query %s gave non-ok status %s: %s" % (reply.orig_request,
930 reply.server_reply.status,
932 if reply.orig_request.type == constants.CONFD_REQ_PING:
933 Log("Ping: OK", indent=1)
934 elif reply.orig_request.type == constants.CONFD_REQ_CLUSTER_MASTER:
935 if reply.server_reply.answer == self.cluster_info["master"]:
936 Log("Master: OK", indent=1)
938 Err("Master: wrong: %s" % reply.server_reply.answer)
939 elif reply.orig_request.type == constants.CONFD_REQ_NODE_ROLE_BYNAME:
940 if reply.server_reply.answer == constants.CONFD_NODE_ROLE_MASTER:
941 Log("Node role for master: OK", indent=1)
943 Err("Node role for master: wrong: %s" % reply.server_reply.answer)
945 def DoConfdRequestReply(self, req):
946 self.confd_counting_callback.RegisterQuery(req.rsalt)
947 self.confd_client.SendRequest(req, async=False)
948 while not self.confd_counting_callback.AllAnswered():
949 if not self.confd_client.ReceiveReply():
950 Err("Did not receive all expected confd replies")
954 """Run confd queries for our instances.
956 The following confd queries are tested:
957 - CONFD_REQ_PING: simple ping
958 - CONFD_REQ_CLUSTER_MASTER: cluster master
959 - CONFD_REQ_NODE_ROLE_BYNAME: node role, for the master
962 Log("Checking confd results")
964 filter_callback = confd_client.ConfdFilterCallback(self.ConfdCallback)
965 counting_callback = confd_client.ConfdCountingCallback(filter_callback)
966 self.confd_counting_callback = counting_callback
968 self.confd_client = confd_client.GetConfdClient(counting_callback)
970 req = confd_client.ConfdClientRequest(type=constants.CONFD_REQ_PING)
971 self.DoConfdRequestReply(req)
973 req = confd_client.ConfdClientRequest(
974 type=constants.CONFD_REQ_CLUSTER_MASTER)
975 self.DoConfdRequestReply(req)
977 req = confd_client.ConfdClientRequest(
978 type=constants.CONFD_REQ_NODE_ROLE_BYNAME,
979 query=self.cluster_info["master"])
980 self.DoConfdRequestReply(req)
982 def _CheckInstanceAlive(self, instance):
983 """Check if an instance is alive by doing http checks.
985 This will try to retrieve the url on the instance /hostname.txt
986 and check that it contains the hostname of the instance. In case
987 we get ECONNREFUSED, we retry up to the net timeout seconds, for
988 any other error we abort.
991 if not self.opts.http_check:
993 end_time = time.time() + self.opts.net_timeout
995 while time.time() < end_time and url is None:
997 url = self.url_opener.open("http://%s/hostname.txt" % instance)
999 # here we can have connection refused, no route to host, etc.
1002 raise InstanceDown(instance, "Cannot contact instance")
1003 hostname = url.read().strip()
1005 if hostname != instance:
1006 raise InstanceDown(instance, ("Hostname mismatch, expected %s, got %s" %
1007 (instance, hostname)))
1009 def BurninCluster(self):
1010 """Test a cluster intensively.
1012 This will create instances and then start/stop/failover them.
1013 It is safe for existing instances but could impact performance.
1019 Log("Testing global parameters")
1021 if (len(self.nodes) == 1 and
1022 opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
1024 constants.DT_SHARED_FILE)):
1025 Err("When one node is available/selected the disk template must"
1026 " be 'diskless', 'file' or 'plain'")
1028 if opts.do_confd_tests and not constants.ENABLE_CONFD:
1029 Err("You selected confd tests but confd was disabled at configure time")
1033 self.BurnCreateInstances()
1035 if self.bep[constants.BE_MINMEM] < self.bep[constants.BE_MAXMEM]:
1036 self.BurnModifyRuntimeMemory()
1038 if opts.do_replace1 and opts.disk_template in constants.DTS_INT_MIRROR:
1039 self.BurnReplaceDisks1D8()
1040 if (opts.do_replace2 and len(self.nodes) > 2 and
1041 opts.disk_template in constants.DTS_INT_MIRROR):
1042 self.BurnReplaceDisks2()
1044 if (opts.disk_template in constants.DTS_GROWABLE and
1045 compat.any(n > 0 for n in self.disk_growth)):
1046 self.BurnGrowDisks()
1048 if opts.do_failover and opts.disk_template in constants.DTS_MIRRORED:
1052 if opts.disk_template not in constants.DTS_MIRRORED:
1053 Log("Skipping migration (disk template %s does not support it)",
1055 elif not self.hv_class.CAN_MIGRATE:
1056 Log("Skipping migration (hypervisor %s does not support it)",
1061 if (opts.do_move and len(self.nodes) > 1 and
1062 opts.disk_template in [constants.DT_PLAIN, constants.DT_FILE]):
1065 if (opts.do_importexport and
1066 opts.disk_template not in (constants.DT_DISKLESS,
1067 constants.DT_SHARED_FILE,
1068 constants.DT_FILE)):
1069 self.BurnImportExport()
1071 if opts.do_reinstall:
1072 self.BurnReinstall()
1077 if opts.do_addremove_disks:
1078 self.BurnAddRemoveDisks()
1080 default_nic_mode = self.cluster_default_nicparams[constants.NIC_MODE]
1081 # Don't add/remove nics in routed mode, as we would need an ip to add
1083 if opts.do_addremove_nics:
1084 if default_nic_mode == constants.NIC_MODE_BRIDGED:
1085 self.BurnAddRemoveNICs()
1087 Log("Skipping nic add/remove as the cluster is not in bridged mode")
1089 if opts.do_activate_disks:
1090 self.BurnActivateDisks()
1095 if opts.do_confd_tests:
1098 if opts.do_startstop:
1099 self.BurnStopStart()
1104 Log("Error detected: opcode buffer follows:\n\n")
1105 Log(self.GetFeedbackBuf())
1107 if not self.opts.keep_instances:
1110 except Exception, err: # pylint: disable=W0703
1111 if has_err: # already detected errors, so errors in removal
1112 # are quite expected
1113 Log("Note: error detected during instance remove: %s", err)
1114 else: # non-expected error
1117 return constants.EXIT_SUCCESS
1124 utils.SetupLogging(pathutils.LOG_BURNIN, sys.argv[0],
1125 debug=False, stderr_logging=True)
1127 return Burner().BurninCluster()
1130 if __name__ == "__main__":