Small burnin info message change
[ganeti-local] / tools / burnin
1 #!/usr/bin/python
2 #
3
4 # Copyright (C) 2006, 2007 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Burnin program
23
24 """
25
26 import os
27 import sys
28 import optparse
29 import time
30 from itertools import izip, islice, cycle
31 from cStringIO import StringIO
32
33 from ganeti import opcodes
34 from ganeti import mcpu
35 from ganeti import constants
36 from ganeti import cli
37 from ganeti import errors
38 from ganeti import utils
39
40
41 USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
42
43
44 def Usage():
45   """Shows program usage information and exits the program."""
46
47   print >> sys.stderr, "Usage:"
48   print >> sys.stderr, USAGE
49   sys.exit(2)
50
51
52 def Log(msg):
53   """Simple function that prints out its argument.
54
55   """
56   print msg
57   sys.stdout.flush()
58
59
60 class Burner(object):
61   """Burner class."""
62
63   def __init__(self):
64     """Constructor."""
65     utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
66     self._feed_buf = StringIO()
67     self.nodes = []
68     self.instances = []
69     self.to_rem = []
70     self.opts = None
71     self.cl = cli.GetClient()
72     self.ParseOptions()
73     self.GetState()
74
75   def ClearFeedbackBuf(self):
76     """Clear the feedback buffer."""
77     self._feed_buf.truncate(0)
78
79   def GetFeedbackBuf(self):
80     """Return the contents of the buffer."""
81     return self._feed_buf.getvalue()
82
83   def Feedback(self, msg):
84     """Acumulate feedback in our buffer."""
85     self._feed_buf.write("%s %s\n" % (time.ctime(utils.MergeTime(msg[0])),
86                                       msg[2]))
87     if self.opts.verbose:
88       Log(msg)
89
90   def ExecOp(self, op):
91     """Execute an opcode and manage the exec buffer."""
92     self.ClearFeedbackBuf()
93     return cli.SubmitOpCode(op, feedback_fn=self.Feedback, cl=self.cl)
94
95   def ExecJobSet(self, jobs):
96     """Execute a set of jobs and return once all are done.
97
98     The method will return the list of results, if all jobs are
99     successfull. Otherwise, OpExecError will be raised from within
100     cli.py.
101
102     """
103     self.ClearFeedbackBuf()
104     job_ids = [cli.SendJob(job, cl=self.cl) for job in jobs]
105     Log("- Submitted job IDs %s" % ", ".join(job_ids))
106     results = []
107     for jid in job_ids:
108       Log("- Waiting for job %s" % jid)
109       results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
110
111     return results
112
113   def ParseOptions(self):
114     """Parses the command line options.
115
116     In case of command line errors, it will show the usage and exit the
117     program.
118
119     """
120
121     parser = optparse.OptionParser(usage="\n%s" % USAGE,
122                                    version="%%prog (ganeti) %s" %
123                                    constants.RELEASE_VERSION,
124                                    option_class=cli.CliOption)
125
126     parser.add_option("-o", "--os", dest="os", default=None,
127                       help="OS to use during burnin",
128                       metavar="<OS>")
129     parser.add_option("--disk-size", dest="disk_size",
130                       help="Disk size (determines disk count)",
131                       default="128m", type="string", metavar="<size,size,...>")
132     parser.add_option("--disk-growth", dest="disk_growth", help="Disk growth",
133                       default="128m", type="string", metavar="<size,size,...>")
134     parser.add_option("--mem-size", dest="mem_size", help="Memory size",
135                       default=128, type="unit", metavar="<size>")
136     parser.add_option("-v", "--verbose",
137                       action="store_true", dest="verbose", default=False,
138                       help="print command execution messages to stdout")
139     parser.add_option("--no-replace1", dest="do_replace1",
140                       help="Skip disk replacement with the same secondary",
141                       action="store_false", default=True)
142     parser.add_option("--no-replace2", dest="do_replace2",
143                       help="Skip disk replacement with a different secondary",
144                       action="store_false", default=True)
145     parser.add_option("--no-failover", dest="do_failover",
146                       help="Skip instance failovers", action="store_false",
147                       default=True)
148     parser.add_option("--no-importexport", dest="do_importexport",
149                       help="Skip instance export/import", action="store_false",
150                       default=True)
151     parser.add_option("--no-startstop", dest="do_startstop",
152                       help="Skip instance stop/start", action="store_false",
153                       default=True)
154     parser.add_option("--rename", dest="rename", default=None,
155                       help="Give one unused instance name which is taken"
156                            " to start the renaming sequence",
157                       metavar="<instance_name>")
158     parser.add_option("-t", "--disk-template", dest="disk_template",
159                       choices=("diskless", "file", "plain", "drbd"),
160                       default="drbd",
161                       help="Disk template (diskless, file, plain or drbd)"
162                             " [drbd]")
163     parser.add_option("-n", "--nodes", dest="nodes", default="",
164                       help="Comma separated list of nodes to perform"
165                       " the burnin on (defaults to all nodes)")
166     parser.add_option("--iallocator", dest="iallocator",
167                       default=None, type="string",
168                       help="Perform the allocation using an iallocator"
169                       " instead of fixed node spread (node restrictions no"
170                       " longer apply, therefore -n/--nodes must not be used")
171     parser.add_option("-p", "--parallel", default=False, action="store_true",
172                       dest="parallel",
173                       help="Enable parallelization of some operations in"
174                       " order to speed burnin or to test granular locking")
175
176     options, args = parser.parse_args()
177     if len(args) < 1 or options.os is None:
178       Usage()
179
180     supported_disk_templates = (constants.DT_DISKLESS,
181                                 constants.DT_FILE,
182                                 constants.DT_PLAIN,
183                                 constants.DT_DRBD8)
184     if options.disk_template not in supported_disk_templates:
185       Log("Unknown disk template '%s'" % options.disk_template)
186       sys.exit(1)
187
188     disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
189     disk_growth = [utils.ParseUnit(v) for v in options.disk_growth.split(",")]
190     if len(disk_growth) != len(disk_size):
191       Log("Wrong disk sizes/growth combination")
192       sys.exit(1)
193     if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
194         (not disk_size and options.disk_template != constants.DT_DISKLESS)):
195       Log("Wrong disk count/disk template combination")
196       sys.exit(1)
197
198     self.disk_size = disk_size
199     self.disk_growth = disk_growth
200     self.disk_count = len(disk_size)
201
202     if options.nodes and options.iallocator:
203       Log("Give either the nodes option or the iallocator option, not both")
204       sys.exit(1)
205
206     self.opts = options
207     self.instances = args
208     self.bep = {
209       constants.BE_MEMORY: options.mem_size,
210       constants.BE_VCPUS: 1,
211       }
212     self.hvp = {}
213
214   def GetState(self):
215     """Read the cluster state from the config."""
216     if self.opts.nodes:
217       names = self.opts.nodes.split(",")
218     else:
219       names = []
220     try:
221       op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
222       result = self.ExecOp(op)
223     except errors.GenericError, err:
224       err_code, msg = cli.FormatError(err)
225       Log(msg)
226       sys.exit(err_code)
227     self.nodes = [data[0] for data in result]
228
229     result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
230                                               names=[]))
231
232     if not result:
233       Log("Can't get the OS list")
234       sys.exit(1)
235
236     # filter non-valid OS-es
237     os_set = [val[0] for val in result if val[1]]
238
239     if self.opts.os not in os_set:
240       Log("OS '%s' not found" % self.opts.os)
241       sys.exit(1)
242
243   def CreateInstances(self):
244     """Create the given instances.
245
246     """
247     self.to_rem = []
248     mytor = izip(cycle(self.nodes),
249                  islice(cycle(self.nodes), 1, None),
250                  self.instances)
251     jobset = []
252
253     for pnode, snode, instance in mytor:
254       if self.opts.iallocator:
255         pnode = snode = None
256         Log("- Add instance %s (iallocator: %s)" %
257               (instance, self.opts.iallocator))
258       elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
259         snode = None
260         Log("- Add instance %s on node %s" % (instance, pnode))
261       else:
262         Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode))
263
264       op = opcodes.OpCreateInstance(instance_name=instance,
265                                     disks = [ {"size": size}
266                                               for size in self.disk_size],
267                                     disk_template=self.opts.disk_template,
268                                     nics=[{}],
269                                     mode=constants.INSTANCE_CREATE,
270                                     os_type=self.opts.os,
271                                     pnode=pnode,
272                                     snode=snode,
273                                     start=True,
274                                     ip_check=True,
275                                     wait_for_sync=True,
276                                     file_driver="loop",
277                                     file_storage_dir=None,
278                                     iallocator=self.opts.iallocator,
279                                     beparams=self.bep,
280                                     hvparams=self.hvp,
281                                     )
282
283       if self.opts.parallel:
284         jobset.append([op])
285         # FIXME: here we should not append to to_rem uncoditionally,
286         # but only when the job is successful
287         self.to_rem.append(instance)
288       else:
289         self.ExecOp(op)
290         self.to_rem.append(instance)
291     if self.opts.parallel:
292       self.ExecJobSet(jobset)
293
294   def GrowDisks(self):
295     """Grow both the os and the swap disks by the requested amount, if any."""
296     for instance in self.instances:
297       for idx, growth in enumerate(self.disk_growth):
298         if growth > 0:
299           op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
300                                   amount=growth, wait_for_sync=True)
301           Log("- Increase %s's disk/%s by %s MB" % (instance, idx, growth))
302           self.ExecOp(op)
303
304   def ReplaceDisks1D8(self):
305     """Replace disks on primary and secondary for drbd8."""
306     for instance in self.instances:
307       for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
308         op = opcodes.OpReplaceDisks(instance_name=instance,
309                                     mode=mode,
310                                     disks=[i for i in range(self.disk_count)])
311         Log("- Replace disks (%s) for instance %s" % (mode, instance))
312         self.ExecOp(op)
313
314   def ReplaceDisks2(self):
315     """Replace secondary node."""
316     mode = constants.REPLACE_DISK_SEC
317
318     mytor = izip(islice(cycle(self.nodes), 2, None),
319                  self.instances)
320     for tnode, instance in mytor:
321       if self.opts.iallocator:
322         tnode = None
323       op = opcodes.OpReplaceDisks(instance_name=instance,
324                                   mode=mode,
325                                   remote_node=tnode,
326                                   iallocator=self.opts.iallocator,
327                                   disks=[i for i in range(self.disk_count)])
328       Log("- Replace secondary (%s) for instance %s" % (mode, instance))
329       self.ExecOp(op)
330
331   def Failover(self):
332     """Failover the instances."""
333
334     for instance in self.instances:
335       op = opcodes.OpFailoverInstance(instance_name=instance,
336                                       ignore_consistency=False)
337
338       Log("- Failover instance %s" % (instance))
339       self.ExecOp(op)
340
341   def ImportExport(self):
342     """Export the instance, delete it, and import it back.
343
344     """
345
346     mytor = izip(cycle(self.nodes),
347                  islice(cycle(self.nodes), 1, None),
348                  islice(cycle(self.nodes), 2, None),
349                  self.instances)
350
351     for pnode, snode, enode, instance in mytor:
352
353       if self.opts.iallocator:
354         pnode = snode = None
355         import_log_msg = ("- Import instance %s from node %s (iallocator: %s)" %
356                           (instance, enode, self.opts.iallocator))
357       elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
358         snode = None
359         import_log_msg = ("- Import instance %s from node %s to node %s" %
360                           (instance, enode, pnode))
361       else:
362         import_log_msg = ("- Import instance %s from node %s to nodes %s/%s" %
363                           (instance, enode, pnode, snode))
364
365       exp_op = opcodes.OpExportInstance(instance_name=instance,
366                                            target_node=enode,
367                                            shutdown=True)
368       rem_op = opcodes.OpRemoveInstance(instance_name=instance,
369                                         ignore_failures=True)
370       nam_op = opcodes.OpQueryInstances(output_fields=["name"],
371                                            names=[instance])
372       full_name = self.ExecOp(nam_op)[0][0]
373       imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
374       imp_op = opcodes.OpCreateInstance(instance_name=instance,
375                                         disk_size=self.opts.os_size,
376                                         swap_size=self.opts.swap_size,
377                                         disk_template=self.opts.disk_template,
378                                         mode=constants.INSTANCE_IMPORT,
379                                         src_node=enode,
380                                         src_path=imp_dir,
381                                         pnode=pnode,
382                                         snode=snode,
383                                         start=True,
384                                         ip_check=True,
385                                         wait_for_sync=True,
386                                         mac="auto",
387                                         file_storage_dir=None,
388                                         file_driver=None,
389                                         iallocator=self.opts.iallocator,
390                                         beparams=self.bep,
391                                         hvparams=self.hvp,
392                                         )
393
394       erem_op = opcodes.OpRemoveExport(instance_name=instance)
395
396       Log("- Export instance %s to node %s" % (instance, enode))
397       self.ExecOp(exp_op)
398       Log("- Remove instance %s" % (instance))
399       self.ExecOp(rem_op)
400       self.to_rem.remove(instance)
401       Log(import_log_msg)
402       self.ExecOp(imp_op)
403       Log("- Remove export of instance %s" % (instance))
404       self.ExecOp(erem_op)
405
406       self.to_rem.append(instance)
407
408   def StopInstance(self, instance):
409     """Stop given instance."""
410     op = opcodes.OpShutdownInstance(instance_name=instance)
411     Log("- Shutdown instance %s" % instance)
412     self.ExecOp(op)
413
414   def StartInstance(self, instance):
415     """Start given instance."""
416     op = opcodes.OpStartupInstance(instance_name=instance, force=False)
417     Log("- Start instance %s" % instance)
418     self.ExecOp(op)
419
420   def RenameInstance(self, instance, instance_new):
421     """Rename instance."""
422     op = opcodes.OpRenameInstance(instance_name=instance,
423                                   new_name=instance_new)
424     Log("- Rename instance %s to %s" % (instance, instance_new))
425     self.ExecOp(op)
426
427   def StopStart(self):
428     """Stop/start the instances."""
429     for instance in self.instances:
430       self.StopInstance(instance)
431       self.StartInstance(instance)
432
433   def Remove(self):
434     """Remove the instances."""
435     for instance in self.to_rem:
436       op = opcodes.OpRemoveInstance(instance_name=instance,
437                                     ignore_failures=True)
438       Log("- Remove instance %s" % instance)
439       self.ExecOp(op)
440
441
442   def Rename(self):
443     """Rename the instances."""
444     rename = self.opts.rename
445     for instance in self.instances:
446       self.StopInstance(instance)
447       self.RenameInstance(instance, rename)
448       self.StartInstance(rename)
449       self.StopInstance(rename)
450       self.RenameInstance(rename, instance)
451       self.StartInstance(instance)
452
453   def BurninCluster(self):
454     """Test a cluster intensively.
455
456     This will create instances and then start/stop/failover them.
457     It is safe for existing instances but could impact performance.
458
459     """
460
461     opts = self.opts
462
463     Log("- Testing global parameters")
464
465     if (len(self.nodes) == 1 and
466         opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
467                                    constants.DT_FILE)):
468       Log("When one node is available/selected the disk template must"
469           " be 'diskless', 'file' or 'plain'")
470       sys.exit(1)
471
472     has_err = True
473     try:
474       self.CreateInstances()
475       if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
476         self.ReplaceDisks1D8()
477       if (opts.do_replace2 and len(self.nodes) > 2 and
478           opts.disk_template in constants.DTS_NET_MIRROR) :
479         self.ReplaceDisks2()
480
481       if opts.disk_template != constants.DT_DISKLESS:
482         self.GrowDisks()
483
484       if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
485         self.Failover()
486
487       if opts.do_importexport:
488         self.ImportExport()
489
490       if opts.do_startstop:
491         self.StopStart()
492
493       if opts.rename:
494         self.Rename()
495
496       has_err = False
497     finally:
498       if has_err:
499         Log("Error detected: opcode buffer follows:\n\n")
500         Log(self.GetFeedbackBuf())
501         Log("\n\n")
502       self.Remove()
503
504     return 0
505
506
507 def main():
508   """Main function"""
509
510   burner = Burner()
511   return burner.BurninCluster()
512
513
514 if __name__ == "__main__":
515   main()