4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
30 from itertools import izip, islice, cycle
31 from cStringIO import StringIO
33 from ganeti import opcodes
34 from ganeti import mcpu
35 from ganeti import constants
36 from ganeti import cli
37 from ganeti import errors
38 from ganeti import utils
41 USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
45 """Shows program usage information and exits the program."""
47 print >> sys.stderr, "Usage:"
48 print >> sys.stderr, USAGE
53 """Simple function that prints out its argument.
65 utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
66 self._feed_buf = StringIO()
71 self.cl = cli.GetClient()
75 def ClearFeedbackBuf(self):
76 """Clear the feedback buffer."""
77 self._feed_buf.truncate(0)
79 def GetFeedbackBuf(self):
80 """Return the contents of the buffer."""
81 return self._feed_buf.getvalue()
83 def Feedback(self, msg):
84 """Acumulate feedback in our buffer."""
85 self._feed_buf.write("%s %s\n" % (time.ctime(utils.MergeTime(msg[0])),
91 """Execute an opcode and manage the exec buffer."""
92 self.ClearFeedbackBuf()
93 return cli.SubmitOpCode(op, feedback_fn=self.Feedback, cl=self.cl)
95 def ExecJobSet(self, jobs):
96 """Execute a set of jobs and return once all are done.
98 The method will return the list of results, if all jobs are
99 successfull. Otherwise, OpExecError will be raised from within
103 self.ClearFeedbackBuf()
104 job_ids = [cli.SendJob(job, cl=self.cl) for job in jobs]
105 Log("- Submitted job IDs %s" % ", ".join(job_ids))
108 Log("- Waiting for job %s" % jid)
109 results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
113 def ParseOptions(self):
114 """Parses the command line options.
116 In case of command line errors, it will show the usage and exit the
121 parser = optparse.OptionParser(usage="\n%s" % USAGE,
122 version="%%prog (ganeti) %s" %
123 constants.RELEASE_VERSION,
124 option_class=cli.CliOption)
126 parser.add_option("-o", "--os", dest="os", default=None,
127 help="OS to use during burnin",
129 parser.add_option("--disk-size", dest="disk_size",
130 help="Disk size (determines disk count)",
131 default="128m", type="string", metavar="<size,size,...>")
132 parser.add_option("--disk-growth", dest="disk_growth", help="Disk growth",
133 default="128m", type="string", metavar="<size,size,...>")
134 parser.add_option("--mem-size", dest="mem_size", help="Memory size",
135 default=128, type="unit", metavar="<size>")
136 parser.add_option("-v", "--verbose",
137 action="store_true", dest="verbose", default=False,
138 help="print command execution messages to stdout")
139 parser.add_option("--no-replace1", dest="do_replace1",
140 help="Skip disk replacement with the same secondary",
141 action="store_false", default=True)
142 parser.add_option("--no-replace2", dest="do_replace2",
143 help="Skip disk replacement with a different secondary",
144 action="store_false", default=True)
145 parser.add_option("--no-failover", dest="do_failover",
146 help="Skip instance failovers", action="store_false",
148 parser.add_option("--no-importexport", dest="do_importexport",
149 help="Skip instance export/import", action="store_false",
151 parser.add_option("--no-startstop", dest="do_startstop",
152 help="Skip instance stop/start", action="store_false",
154 parser.add_option("--no-reinstall", dest="do_reinstall",
155 help="Skip instance reinstall", action="store_false",
157 parser.add_option("--no-reboot", dest="do_reboot",
158 help="Skip instance reboot", action="store_false",
160 parser.add_option("--no-nics", dest="nics",
161 help="No network interfaces", action="store_const",
162 const=[], default=[{}])
163 parser.add_option("--rename", dest="rename", default=None,
164 help="Give one unused instance name which is taken"
165 " to start the renaming sequence",
166 metavar="<instance_name>")
167 parser.add_option("-t", "--disk-template", dest="disk_template",
168 choices=("diskless", "file", "plain", "drbd"),
170 help="Disk template (diskless, file, plain or drbd)"
172 parser.add_option("-n", "--nodes", dest="nodes", default="",
173 help="Comma separated list of nodes to perform"
174 " the burnin on (defaults to all nodes)")
175 parser.add_option("--iallocator", dest="iallocator",
176 default=None, type="string",
177 help="Perform the allocation using an iallocator"
178 " instead of fixed node spread (node restrictions no"
179 " longer apply, therefore -n/--nodes must not be used")
180 parser.add_option("-p", "--parallel", default=False, action="store_true",
182 help="Enable parallelization of some operations in"
183 " order to speed burnin or to test granular locking")
185 options, args = parser.parse_args()
186 if len(args) < 1 or options.os is None:
189 supported_disk_templates = (constants.DT_DISKLESS,
193 if options.disk_template not in supported_disk_templates:
194 Log("Unknown disk template '%s'" % options.disk_template)
197 if options.disk_template == constants.DT_DISKLESS:
198 disk_size = disk_growth = []
200 disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
201 disk_growth = [utils.ParseUnit(v)
202 for v in options.disk_growth.split(",")]
203 if len(disk_growth) != len(disk_size):
204 Log("Wrong disk sizes/growth combination")
206 if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
207 (not disk_size and options.disk_template != constants.DT_DISKLESS)):
208 Log("Wrong disk count/disk template combination")
211 self.disk_size = disk_size
212 self.disk_growth = disk_growth
213 self.disk_count = len(disk_size)
215 if options.nodes and options.iallocator:
216 Log("Give either the nodes option or the iallocator option, not both")
220 self.instances = args
222 constants.BE_MEMORY: options.mem_size,
223 constants.BE_VCPUS: 1,
228 """Read the cluster state from the config."""
230 names = self.opts.nodes.split(",")
234 op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
235 result = self.ExecOp(op)
236 except errors.GenericError, err:
237 err_code, msg = cli.FormatError(err)
240 self.nodes = [data[0] for data in result]
242 result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
246 Log("Can't get the OS list")
249 # filter non-valid OS-es
250 os_set = [val[0] for val in result if val[1]]
252 if self.opts.os not in os_set:
253 Log("OS '%s' not found" % self.opts.os)
256 def CreateInstances(self):
257 """Create the given instances.
261 mytor = izip(cycle(self.nodes),
262 islice(cycle(self.nodes), 1, None),
266 for pnode, snode, instance in mytor:
267 if self.opts.iallocator:
269 Log("- Add instance %s (iallocator: %s)" %
270 (instance, self.opts.iallocator))
271 elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
273 Log("- Add instance %s on node %s" % (instance, pnode))
275 Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode))
277 op = opcodes.OpCreateInstance(instance_name=instance,
278 disks = [ {"size": size}
279 for size in self.disk_size],
280 disk_template=self.opts.disk_template,
282 mode=constants.INSTANCE_CREATE,
283 os_type=self.opts.os,
290 file_storage_dir=None,
291 iallocator=self.opts.iallocator,
296 if self.opts.parallel:
298 # FIXME: here we should not append to to_rem uncoditionally,
299 # but only when the job is successful
300 self.to_rem.append(instance)
303 self.to_rem.append(instance)
304 if self.opts.parallel:
305 self.ExecJobSet(jobset)
308 """Grow both the os and the swap disks by the requested amount, if any."""
309 for instance in self.instances:
310 for idx, growth in enumerate(self.disk_growth):
312 op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
313 amount=growth, wait_for_sync=True)
314 Log("- Increase %s's disk/%s by %s MB" % (instance, idx, growth))
317 def ReplaceDisks1D8(self):
318 """Replace disks on primary and secondary for drbd8."""
319 for instance in self.instances:
320 for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
321 op = opcodes.OpReplaceDisks(instance_name=instance,
323 disks=[i for i in range(self.disk_count)])
324 Log("- Replace disks (%s) for instance %s" % (mode, instance))
327 def ReplaceDisks2(self):
328 """Replace secondary node."""
329 mode = constants.REPLACE_DISK_SEC
331 mytor = izip(islice(cycle(self.nodes), 2, None),
333 for tnode, instance in mytor:
334 if self.opts.iallocator:
336 op = opcodes.OpReplaceDisks(instance_name=instance,
339 iallocator=self.opts.iallocator,
340 disks=[i for i in range(self.disk_count)])
341 Log("- Replace secondary (%s) for instance %s" % (mode, instance))
345 """Failover the instances."""
347 for instance in self.instances:
348 op = opcodes.OpFailoverInstance(instance_name=instance,
349 ignore_consistency=False)
351 Log("- Failover instance %s" % (instance))
354 def ImportExport(self):
355 """Export the instance, delete it, and import it back.
359 mytor = izip(cycle(self.nodes),
360 islice(cycle(self.nodes), 1, None),
361 islice(cycle(self.nodes), 2, None),
364 for pnode, snode, enode, instance in mytor:
366 if self.opts.iallocator:
368 import_log_msg = ("- Import instance %s from node %s"
369 " (iallocator: %s)" %
370 (instance, enode, self.opts.iallocator))
371 elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
373 import_log_msg = ("- Import instance %s from node %s to node %s" %
374 (instance, enode, pnode))
376 import_log_msg = ("- Import instance %s from node %s to nodes %s/%s" %
377 (instance, enode, pnode, snode))
379 exp_op = opcodes.OpExportInstance(instance_name=instance,
382 rem_op = opcodes.OpRemoveInstance(instance_name=instance,
383 ignore_failures=True)
384 nam_op = opcodes.OpQueryInstances(output_fields=["name"],
386 full_name = self.ExecOp(nam_op)[0][0]
387 imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
388 imp_op = opcodes.OpCreateInstance(instance_name=instance,
389 disks = [ {"size": size}
390 for size in self.disk_size],
391 disk_template=self.opts.disk_template,
393 mode=constants.INSTANCE_IMPORT,
401 file_storage_dir=None,
403 iallocator=self.opts.iallocator,
408 erem_op = opcodes.OpRemoveExport(instance_name=instance)
410 Log("- Export instance %s to node %s" % (instance, enode))
412 Log("- Remove instance %s" % (instance))
414 self.to_rem.remove(instance)
417 Log("- Remove export of instance %s" % (instance))
420 self.to_rem.append(instance)
422 def StopInstance(self, instance):
423 """Stop given instance."""
424 op = opcodes.OpShutdownInstance(instance_name=instance)
425 Log("- Shutdown instance %s" % instance)
428 def StartInstance(self, instance):
429 """Start given instance."""
430 op = opcodes.OpStartupInstance(instance_name=instance, force=False)
431 Log("- Start instance %s" % instance)
434 def RenameInstance(self, instance, instance_new):
435 """Rename instance."""
436 op = opcodes.OpRenameInstance(instance_name=instance,
437 new_name=instance_new)
438 Log("- Rename instance %s to %s" % (instance, instance_new))
442 """Stop/start the instances."""
443 for instance in self.instances:
444 self.StopInstance(instance)
445 self.StartInstance(instance)
448 """Remove the instances."""
449 for instance in self.to_rem:
450 op = opcodes.OpRemoveInstance(instance_name=instance,
451 ignore_failures=True)
452 Log("- Remove instance %s" % instance)
457 """Rename the instances."""
458 rename = self.opts.rename
459 for instance in self.instances:
460 self.StopInstance(instance)
461 self.RenameInstance(instance, rename)
462 self.StartInstance(rename)
463 self.StopInstance(rename)
464 self.RenameInstance(rename, instance)
465 self.StartInstance(instance)
468 """Reinstall the instances."""
469 for instance in self.instances:
470 self.StopInstance(instance)
471 op = opcodes.OpReinstallInstance(instance_name=instance)
472 Log("- Reinstall instance %s without passing the OS" % (instance,))
474 op = opcodes.OpReinstallInstance(instance_name=instance,
475 os_type=self.opts.os)
476 Log("- Reinstall instance %s specifying the OS" % (instance,))
478 self.StartInstance(instance)
481 """Reinstall the instances."""
482 for instance in self.instances:
483 for reboot_type in constants.REBOOT_TYPES:
484 op = opcodes.OpRebootInstance(instance_name=instance,
485 reboot_type=reboot_type,
486 ignore_secondaries=False)
487 Log("- Reboot instance %s with type '%s'" % (instance, reboot_type))
490 def BurninCluster(self):
491 """Test a cluster intensively.
493 This will create instances and then start/stop/failover them.
494 It is safe for existing instances but could impact performance.
500 Log("- Testing global parameters")
502 if (len(self.nodes) == 1 and
503 opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
505 Log("When one node is available/selected the disk template must"
506 " be 'diskless', 'file' or 'plain'")
511 self.CreateInstances()
512 if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
513 self.ReplaceDisks1D8()
514 if (opts.do_replace2 and len(self.nodes) > 2 and
515 opts.disk_template in constants.DTS_NET_MIRROR) :
518 if opts.disk_template != constants.DT_DISKLESS:
521 if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
524 if (opts.do_importexport and
525 opts.disk_template not in (constants.DT_DISKLESS,
529 if opts.do_reinstall:
535 if opts.do_startstop:
544 Log("Error detected: opcode buffer follows:\n\n")
545 Log(self.GetFeedbackBuf())
556 return burner.BurninCluster()
559 if __name__ == "__main__":