burnin: fix usage of diskless template
[ganeti-local] / tools / burnin
1 #!/usr/bin/python
2 #
3
4 # Copyright (C) 2006, 2007 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Burnin program
23
24 """
25
26 import os
27 import sys
28 import optparse
29 import time
30 from itertools import izip, islice, cycle
31 from cStringIO import StringIO
32
33 from ganeti import opcodes
34 from ganeti import mcpu
35 from ganeti import constants
36 from ganeti import cli
37 from ganeti import errors
38 from ganeti import utils
39
40
41 USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
42
43
44 def Usage():
45   """Shows program usage information and exits the program."""
46
47   print >> sys.stderr, "Usage:"
48   print >> sys.stderr, USAGE
49   sys.exit(2)
50
51
52 def Log(msg):
53   """Simple function that prints out its argument.
54
55   """
56   print msg
57   sys.stdout.flush()
58
59
60 class Burner(object):
61   """Burner class."""
62
63   def __init__(self):
64     """Constructor."""
65     utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
66     self._feed_buf = StringIO()
67     self.nodes = []
68     self.instances = []
69     self.to_rem = []
70     self.opts = None
71     self.cl = cli.GetClient()
72     self.ParseOptions()
73     self.GetState()
74
75   def ClearFeedbackBuf(self):
76     """Clear the feedback buffer."""
77     self._feed_buf.truncate(0)
78
79   def GetFeedbackBuf(self):
80     """Return the contents of the buffer."""
81     return self._feed_buf.getvalue()
82
83   def Feedback(self, msg):
84     """Acumulate feedback in our buffer."""
85     self._feed_buf.write("%s %s\n" % (time.ctime(utils.MergeTime(msg[0])),
86                                       msg[2]))
87     if self.opts.verbose:
88       Log(msg)
89
90   def ExecOp(self, op):
91     """Execute an opcode and manage the exec buffer."""
92     self.ClearFeedbackBuf()
93     return cli.SubmitOpCode(op, feedback_fn=self.Feedback, cl=self.cl)
94
95   def ExecJobSet(self, jobs):
96     """Execute a set of jobs and return once all are done.
97
98     The method will return the list of results, if all jobs are
99     successfull. Otherwise, OpExecError will be raised from within
100     cli.py.
101
102     """
103     self.ClearFeedbackBuf()
104     job_ids = [cli.SendJob(job, cl=self.cl) for job in jobs]
105     Log("- Submitted job IDs %s" % ", ".join(job_ids))
106     results = []
107     for jid in job_ids:
108       Log("- Waiting for job %s" % jid)
109       results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
110
111     return results
112
113   def ParseOptions(self):
114     """Parses the command line options.
115
116     In case of command line errors, it will show the usage and exit the
117     program.
118
119     """
120
121     parser = optparse.OptionParser(usage="\n%s" % USAGE,
122                                    version="%%prog (ganeti) %s" %
123                                    constants.RELEASE_VERSION,
124                                    option_class=cli.CliOption)
125
126     parser.add_option("-o", "--os", dest="os", default=None,
127                       help="OS to use during burnin",
128                       metavar="<OS>")
129     parser.add_option("--disk-size", dest="disk_size",
130                       help="Disk size (determines disk count)",
131                       default="128m", type="string", metavar="<size,size,...>")
132     parser.add_option("--disk-growth", dest="disk_growth", help="Disk growth",
133                       default="128m", type="string", metavar="<size,size,...>")
134     parser.add_option("--mem-size", dest="mem_size", help="Memory size",
135                       default=128, type="unit", metavar="<size>")
136     parser.add_option("-v", "--verbose",
137                       action="store_true", dest="verbose", default=False,
138                       help="print command execution messages to stdout")
139     parser.add_option("--no-replace1", dest="do_replace1",
140                       help="Skip disk replacement with the same secondary",
141                       action="store_false", default=True)
142     parser.add_option("--no-replace2", dest="do_replace2",
143                       help="Skip disk replacement with a different secondary",
144                       action="store_false", default=True)
145     parser.add_option("--no-failover", dest="do_failover",
146                       help="Skip instance failovers", action="store_false",
147                       default=True)
148     parser.add_option("--no-importexport", dest="do_importexport",
149                       help="Skip instance export/import", action="store_false",
150                       default=True)
151     parser.add_option("--no-startstop", dest="do_startstop",
152                       help="Skip instance stop/start", action="store_false",
153                       default=True)
154     parser.add_option("--no-nics", dest="nics",
155                       help="No network interfaces", action="store_const",
156                       const=[], default=[{}])
157     parser.add_option("--rename", dest="rename", default=None,
158                       help="Give one unused instance name which is taken"
159                            " to start the renaming sequence",
160                       metavar="<instance_name>")
161     parser.add_option("-t", "--disk-template", dest="disk_template",
162                       choices=("diskless", "file", "plain", "drbd"),
163                       default="drbd",
164                       help="Disk template (diskless, file, plain or drbd)"
165                             " [drbd]")
166     parser.add_option("-n", "--nodes", dest="nodes", default="",
167                       help="Comma separated list of nodes to perform"
168                       " the burnin on (defaults to all nodes)")
169     parser.add_option("--iallocator", dest="iallocator",
170                       default=None, type="string",
171                       help="Perform the allocation using an iallocator"
172                       " instead of fixed node spread (node restrictions no"
173                       " longer apply, therefore -n/--nodes must not be used")
174     parser.add_option("-p", "--parallel", default=False, action="store_true",
175                       dest="parallel",
176                       help="Enable parallelization of some operations in"
177                       " order to speed burnin or to test granular locking")
178
179     options, args = parser.parse_args()
180     if len(args) < 1 or options.os is None:
181       Usage()
182
183     supported_disk_templates = (constants.DT_DISKLESS,
184                                 constants.DT_FILE,
185                                 constants.DT_PLAIN,
186                                 constants.DT_DRBD8)
187     if options.disk_template not in supported_disk_templates:
188       Log("Unknown disk template '%s'" % options.disk_template)
189       sys.exit(1)
190
191     if options.disk_template == constants.DT_DISKLESS:
192       disk_size = disk_growth = []
193     else:
194       disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
195       disk_growth = [utils.ParseUnit(v)
196                      for v in options.disk_growth.split(",")]
197       if len(disk_growth) != len(disk_size):
198         Log("Wrong disk sizes/growth combination")
199         sys.exit(1)
200     if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
201         (not disk_size and options.disk_template != constants.DT_DISKLESS)):
202       Log("Wrong disk count/disk template combination")
203       sys.exit(1)
204
205     self.disk_size = disk_size
206     self.disk_growth = disk_growth
207     self.disk_count = len(disk_size)
208
209     if options.nodes and options.iallocator:
210       Log("Give either the nodes option or the iallocator option, not both")
211       sys.exit(1)
212
213     self.opts = options
214     self.instances = args
215     self.bep = {
216       constants.BE_MEMORY: options.mem_size,
217       constants.BE_VCPUS: 1,
218       }
219     self.hvp = {}
220
221   def GetState(self):
222     """Read the cluster state from the config."""
223     if self.opts.nodes:
224       names = self.opts.nodes.split(",")
225     else:
226       names = []
227     try:
228       op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
229       result = self.ExecOp(op)
230     except errors.GenericError, err:
231       err_code, msg = cli.FormatError(err)
232       Log(msg)
233       sys.exit(err_code)
234     self.nodes = [data[0] for data in result]
235
236     result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
237                                               names=[]))
238
239     if not result:
240       Log("Can't get the OS list")
241       sys.exit(1)
242
243     # filter non-valid OS-es
244     os_set = [val[0] for val in result if val[1]]
245
246     if self.opts.os not in os_set:
247       Log("OS '%s' not found" % self.opts.os)
248       sys.exit(1)
249
250   def CreateInstances(self):
251     """Create the given instances.
252
253     """
254     self.to_rem = []
255     mytor = izip(cycle(self.nodes),
256                  islice(cycle(self.nodes), 1, None),
257                  self.instances)
258     jobset = []
259
260     for pnode, snode, instance in mytor:
261       if self.opts.iallocator:
262         pnode = snode = None
263         Log("- Add instance %s (iallocator: %s)" %
264               (instance, self.opts.iallocator))
265       elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
266         snode = None
267         Log("- Add instance %s on node %s" % (instance, pnode))
268       else:
269         Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode))
270
271       op = opcodes.OpCreateInstance(instance_name=instance,
272                                     disks = [ {"size": size}
273                                               for size in self.disk_size],
274                                     disk_template=self.opts.disk_template,
275                                     nics=self.opts.nics,
276                                     mode=constants.INSTANCE_CREATE,
277                                     os_type=self.opts.os,
278                                     pnode=pnode,
279                                     snode=snode,
280                                     start=True,
281                                     ip_check=True,
282                                     wait_for_sync=True,
283                                     file_driver="loop",
284                                     file_storage_dir=None,
285                                     iallocator=self.opts.iallocator,
286                                     beparams=self.bep,
287                                     hvparams=self.hvp,
288                                     )
289
290       if self.opts.parallel:
291         jobset.append([op])
292         # FIXME: here we should not append to to_rem uncoditionally,
293         # but only when the job is successful
294         self.to_rem.append(instance)
295       else:
296         self.ExecOp(op)
297         self.to_rem.append(instance)
298     if self.opts.parallel:
299       self.ExecJobSet(jobset)
300
301   def GrowDisks(self):
302     """Grow both the os and the swap disks by the requested amount, if any."""
303     for instance in self.instances:
304       for idx, growth in enumerate(self.disk_growth):
305         if growth > 0:
306           op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
307                                   amount=growth, wait_for_sync=True)
308           Log("- Increase %s's disk/%s by %s MB" % (instance, idx, growth))
309           self.ExecOp(op)
310
311   def ReplaceDisks1D8(self):
312     """Replace disks on primary and secondary for drbd8."""
313     for instance in self.instances:
314       for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
315         op = opcodes.OpReplaceDisks(instance_name=instance,
316                                     mode=mode,
317                                     disks=[i for i in range(self.disk_count)])
318         Log("- Replace disks (%s) for instance %s" % (mode, instance))
319         self.ExecOp(op)
320
321   def ReplaceDisks2(self):
322     """Replace secondary node."""
323     mode = constants.REPLACE_DISK_SEC
324
325     mytor = izip(islice(cycle(self.nodes), 2, None),
326                  self.instances)
327     for tnode, instance in mytor:
328       if self.opts.iallocator:
329         tnode = None
330       op = opcodes.OpReplaceDisks(instance_name=instance,
331                                   mode=mode,
332                                   remote_node=tnode,
333                                   iallocator=self.opts.iallocator,
334                                   disks=[i for i in range(self.disk_count)])
335       Log("- Replace secondary (%s) for instance %s" % (mode, instance))
336       self.ExecOp(op)
337
338   def Failover(self):
339     """Failover the instances."""
340
341     for instance in self.instances:
342       op = opcodes.OpFailoverInstance(instance_name=instance,
343                                       ignore_consistency=False)
344
345       Log("- Failover instance %s" % (instance))
346       self.ExecOp(op)
347
348   def ImportExport(self):
349     """Export the instance, delete it, and import it back.
350
351     """
352
353     mytor = izip(cycle(self.nodes),
354                  islice(cycle(self.nodes), 1, None),
355                  islice(cycle(self.nodes), 2, None),
356                  self.instances)
357
358     for pnode, snode, enode, instance in mytor:
359
360       if self.opts.iallocator:
361         pnode = snode = None
362         import_log_msg = ("- Import instance %s from node %s (iallocator: %s)" %
363                           (instance, enode, self.opts.iallocator))
364       elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
365         snode = None
366         import_log_msg = ("- Import instance %s from node %s to node %s" %
367                           (instance, enode, pnode))
368       else:
369         import_log_msg = ("- Import instance %s from node %s to nodes %s/%s" %
370                           (instance, enode, pnode, snode))
371
372       exp_op = opcodes.OpExportInstance(instance_name=instance,
373                                            target_node=enode,
374                                            shutdown=True)
375       rem_op = opcodes.OpRemoveInstance(instance_name=instance,
376                                         ignore_failures=True)
377       nam_op = opcodes.OpQueryInstances(output_fields=["name"],
378                                            names=[instance])
379       full_name = self.ExecOp(nam_op)[0][0]
380       imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
381       imp_op = opcodes.OpCreateInstance(instance_name=instance,
382                                         disks = [ {"size": size}
383                                                   for size in self.disk_size],
384                                         disk_template=self.opts.disk_template,
385                                         nics=self.opts.nics,
386                                         mode=constants.INSTANCE_IMPORT,
387                                         src_node=enode,
388                                         src_path=imp_dir,
389                                         pnode=pnode,
390                                         snode=snode,
391                                         start=True,
392                                         ip_check=True,
393                                         wait_for_sync=True,
394                                         file_storage_dir=None,
395                                         file_driver="loop",
396                                         iallocator=self.opts.iallocator,
397                                         beparams=self.bep,
398                                         hvparams=self.hvp,
399                                         )
400
401       erem_op = opcodes.OpRemoveExport(instance_name=instance)
402
403       Log("- Export instance %s to node %s" % (instance, enode))
404       self.ExecOp(exp_op)
405       Log("- Remove instance %s" % (instance))
406       self.ExecOp(rem_op)
407       self.to_rem.remove(instance)
408       Log(import_log_msg)
409       self.ExecOp(imp_op)
410       Log("- Remove export of instance %s" % (instance))
411       self.ExecOp(erem_op)
412
413       self.to_rem.append(instance)
414
415   def StopInstance(self, instance):
416     """Stop given instance."""
417     op = opcodes.OpShutdownInstance(instance_name=instance)
418     Log("- Shutdown instance %s" % instance)
419     self.ExecOp(op)
420
421   def StartInstance(self, instance):
422     """Start given instance."""
423     op = opcodes.OpStartupInstance(instance_name=instance, force=False)
424     Log("- Start instance %s" % instance)
425     self.ExecOp(op)
426
427   def RenameInstance(self, instance, instance_new):
428     """Rename instance."""
429     op = opcodes.OpRenameInstance(instance_name=instance,
430                                   new_name=instance_new)
431     Log("- Rename instance %s to %s" % (instance, instance_new))
432     self.ExecOp(op)
433
434   def StopStart(self):
435     """Stop/start the instances."""
436     for instance in self.instances:
437       self.StopInstance(instance)
438       self.StartInstance(instance)
439
440   def Remove(self):
441     """Remove the instances."""
442     for instance in self.to_rem:
443       op = opcodes.OpRemoveInstance(instance_name=instance,
444                                     ignore_failures=True)
445       Log("- Remove instance %s" % instance)
446       self.ExecOp(op)
447
448
449   def Rename(self):
450     """Rename the instances."""
451     rename = self.opts.rename
452     for instance in self.instances:
453       self.StopInstance(instance)
454       self.RenameInstance(instance, rename)
455       self.StartInstance(rename)
456       self.StopInstance(rename)
457       self.RenameInstance(rename, instance)
458       self.StartInstance(instance)
459
460   def BurninCluster(self):
461     """Test a cluster intensively.
462
463     This will create instances and then start/stop/failover them.
464     It is safe for existing instances but could impact performance.
465
466     """
467
468     opts = self.opts
469
470     Log("- Testing global parameters")
471
472     if (len(self.nodes) == 1 and
473         opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
474                                    constants.DT_FILE)):
475       Log("When one node is available/selected the disk template must"
476           " be 'diskless', 'file' or 'plain'")
477       sys.exit(1)
478
479     has_err = True
480     try:
481       self.CreateInstances()
482       if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
483         self.ReplaceDisks1D8()
484       if (opts.do_replace2 and len(self.nodes) > 2 and
485           opts.disk_template in constants.DTS_NET_MIRROR) :
486         self.ReplaceDisks2()
487
488       if opts.disk_template != constants.DT_DISKLESS:
489         self.GrowDisks()
490
491       if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
492         self.Failover()
493
494       if opts.do_importexport:
495         self.ImportExport()
496
497       if opts.do_startstop:
498         self.StopStart()
499
500       if opts.rename:
501         self.Rename()
502
503       has_err = False
504     finally:
505       if has_err:
506         Log("Error detected: opcode buffer follows:\n\n")
507         Log(self.GetFeedbackBuf())
508         Log("\n\n")
509       self.Remove()
510
511     return 0
512
513
514 def main():
515   """Main function"""
516
517   burner = Burner()
518   return burner.BurninCluster()
519
520
521 if __name__ == "__main__":
522   main()