burnin: improve -t help message
[ganeti-local] / tools / burnin
1 #!/usr/bin/python
2 #
3
4 # Copyright (C) 2006, 2007 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Burnin program
23
24 """
25
26 import os
27 import sys
28 import optparse
29 from itertools import izip, islice, cycle
30 from cStringIO import StringIO
31
32 from ganeti import opcodes
33 from ganeti import mcpu
34 from ganeti import constants
35 from ganeti import cli
36 from ganeti import logger
37 from ganeti import errors
38 from ganeti import utils
39
40
41 USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
42
43
44 def Usage():
45   """Shows program usage information and exits the program."""
46
47   print >> sys.stderr, "Usage:"
48   print >> sys.stderr, USAGE
49   sys.exit(2)
50
51
52 def Log(msg):
53   """Simple function that prints out its argument.
54
55   """
56   print msg
57   sys.stdout.flush()
58
59
60 class Burner(object):
61   """Burner class."""
62
63   def __init__(self):
64     """Constructor."""
65     logger.SetupLogging(debug=False, program="ganeti/burnin")
66     self._feed_buf = StringIO()
67     self.proc = mcpu.Processor(feedback=self.Feedback)
68     self.nodes = []
69     self.instances = []
70     self.to_rem = []
71     self.opts = None
72     self.ParseOptions()
73     self.GetState()
74
75   def ClearFeedbackBuf(self):
76     """Clear the feedback buffer."""
77     self._feed_buf.truncate(0)
78
79   def GetFeedbackBuf(self):
80     """Return the contents of the buffer."""
81     return self._feed_buf.getvalue()
82
83   def Feedback(self, msg):
84     """Acumulate feedback in our buffer."""
85     self._feed_buf.write(msg)
86     self._feed_buf.write("\n")
87     if self.opts.verbose:
88       Log(msg)
89
90   def ExecOp(self, op):
91     """Execute an opcode and manage the exec buffer."""
92     self.ClearFeedbackBuf()
93     return self.proc.ExecOpCode(op)
94
95   def ParseOptions(self):
96     """Parses the command line options.
97
98     In case of command line errors, it will show the usage and exit the
99     program.
100
101     """
102
103     parser = optparse.OptionParser(usage="\n%s" % USAGE,
104                                    version="%%prog (ganeti) %s" %
105                                    constants.RELEASE_VERSION,
106                                    option_class=cli.CliOption)
107
108     parser.add_option("-o", "--os", dest="os", default=None,
109                       help="OS to use during burnin",
110                       metavar="<OS>")
111     parser.add_option("--os-size", dest="os_size", help="Disk size",
112                       default=4 * 1024, type="unit", metavar="<size>")
113     parser.add_option("--swap-size", dest="swap_size", help="Swap size",
114                       default=4 * 1024, type="unit", metavar="<size>")
115     parser.add_option("--mem-size", dest="mem_size", help="Memory size",
116                       default=128, type="unit", metavar="<size>")
117     parser.add_option("-v", "--verbose",
118                       action="store_true", dest="verbose", default=False,
119                       help="print command execution messages to stdout")
120     parser.add_option("--no-replace1", dest="do_replace1",
121                       help="Skip disk replacement with the same secondary",
122                       action="store_false", default=True)
123     parser.add_option("--no-replace2", dest="do_replace2",
124                       help="Skip disk replacement with a different secondary",
125                       action="store_false", default=True)
126     parser.add_option("--no-failover", dest="do_failover",
127                       help="Skip instance failovers", action="store_false",
128                       default=True)
129     parser.add_option("--no-importexport", dest="do_importexport",
130                       help="Skip instance export/import", action="store_false",
131                       default=True)
132     parser.add_option("--no-startstop", dest="do_startstop",
133                       help="Skip instance stop/start", action="store_false",
134                       default=True)
135     parser.add_option("-t", "--disk-template", dest="disk_template",
136                       choices=("diskless", "plain", "remote_raid1", "drbd"),
137                       default="drbd",
138                       help="Disk template (diskless, plain, drbd, remote_raid1)"
139                       " [drbd]")
140     parser.add_option("-n", "--nodes", dest="nodes", default="",
141                       help="Comma separated list of nodes to perform"
142                       " the burnin on (defaults to all nodes)")
143     parser.add_option("--iallocator", dest="iallocator",
144                       default=None, type="string",
145                       help="Perform the allocation using an iallocator"
146                       " instead of fixed node spread (node restrictions no"
147                       " longer apply, therefore -n/--nodes must not be used")
148
149     options, args = parser.parse_args()
150     if len(args) < 1 or options.os is None:
151       Usage()
152
153     supported_disk_templates = (constants.DT_DISKLESS, constants.DT_PLAIN,
154                                 constants.DT_REMOTE_RAID1,
155                                 constants.DT_DRBD8)
156     if options.disk_template not in supported_disk_templates:
157       Log("Unknown disk template '%s'" % options.disk_template)
158       sys.exit(1)
159
160     if options.nodes and options.iallocator:
161       Log("Give either the nodes option or the iallocator option, not both")
162       sys.exit(1)
163
164     self.opts = options
165     self.instances = args
166
167   def GetState(self):
168     """Read the cluster state from the config."""
169     if self.opts.nodes:
170       names = self.opts.nodes.split(",")
171     else:
172       names = []
173     try:
174       op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
175       result = self.ExecOp(op)
176     except errors.GenericError, err:
177       err_code, msg = cli.FormatError(err)
178       Log(msg)
179       sys.exit(err_code)
180     self.nodes = [data[0] for data in result]
181
182     result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
183                                               names=[]))
184
185     if not result:
186       Log("Can't get the OS list")
187       sys.exit(1)
188
189     # filter non-valid OS-es
190     os_set = [val[0] for val in result if val[1]]
191
192     if self.opts.os not in os_set:
193       Log("OS '%s' not found" % self.opts.os)
194       sys.exit(1)
195
196   def CreateInstances(self):
197     """Create the given instances.
198
199     """
200     self.to_rem = []
201     mytor = izip(cycle(self.nodes),
202                  islice(cycle(self.nodes), 1, None),
203                  self.instances)
204     for pnode, snode, instance in mytor:
205       if self.opts.iallocator:
206         pnode = snode = None
207         Log("- Add instance %s (iallocator: %s)" %
208               (instance, self.opts.iallocator))
209       elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
210         snode = None
211         Log("- Add instance %s on node %s" % (instance, pnode))
212       else:
213         Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode))
214
215       op = opcodes.OpCreateInstance(instance_name=instance,
216                                     mem_size=self.opts.mem_size,
217                                     disk_size=self.opts.os_size,
218                                     swap_size=self.opts.swap_size,
219                                     disk_template=self.opts.disk_template,
220                                     mode=constants.INSTANCE_CREATE,
221                                     os_type=self.opts.os,
222                                     pnode=pnode,
223                                     snode=snode,
224                                     vcpus=1,
225                                     start=True,
226                                     ip_check=True,
227                                     wait_for_sync=True,
228                                     mac="auto",
229                                     kernel_path=None,
230                                     initrd_path=None,
231                                     hvm_boot_order=None,
232                                     iallocator=self.opts.iallocator)
233       self.ExecOp(op)
234       self.to_rem.append(instance)
235
236   def ReplaceDisks1R1(self):
237     """Replace disks with the same secondary for rr1."""
238     # replace all, both disks
239     for instance in self.instances:
240       op = opcodes.OpReplaceDisks(instance_name=instance,
241                                   remote_node=None,
242                                   mode=constants.REPLACE_DISK_ALL,
243                                   disks=["sda", "sdb"])
244
245       Log("- Replace disks for instance %s" % (instance))
246       self.ExecOp(op)
247
248   def ReplaceDisks1D8(self):
249     """Replace disks on primary and secondary for drbd8."""
250     for instance in self.instances:
251       for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
252         op = opcodes.OpReplaceDisks(instance_name=instance,
253                                     mode=mode,
254                                     disks=["sda", "sdb"])
255         Log("- Replace disks (%s) for instance %s" % (mode, instance))
256         self.ExecOp(op)
257
258   def ReplaceDisks2(self):
259     """Replace secondary node."""
260     if self.opts.disk_template == constants.DT_REMOTE_RAID1:
261       mode = constants.REPLACE_DISK_ALL
262     else:
263       mode = constants.REPLACE_DISK_SEC
264
265     mytor = izip(islice(cycle(self.nodes), 2, None),
266                  self.instances)
267     for tnode, instance in mytor:
268       if self.opts.iallocator:
269         tnode = None
270       op = opcodes.OpReplaceDisks(instance_name=instance,
271                                   mode=mode,
272                                   remote_node=tnode,
273                                   iallocator=self.opts.iallocator,
274                                   disks=["sda", "sdb"])
275       Log("- Replace secondary (%s) for instance %s" % (mode, instance))
276       self.ExecOp(op)
277
278   def Failover(self):
279     """Failover the instances."""
280
281     for instance in self.instances:
282       op = opcodes.OpFailoverInstance(instance_name=instance,
283                                       ignore_consistency=False)
284
285       Log("- Failover instance %s" % (instance))
286       self.ExecOp(op)
287
288   def ImportExport(self):
289     """Export the instance, delete it, and import it back.
290
291     """
292
293     mytor = izip(cycle(self.nodes),
294                  islice(cycle(self.nodes), 1, None),
295                  islice(cycle(self.nodes), 2, None),
296                  self.instances)
297
298     for pnode, snode, enode, instance in mytor:
299       exp_op = opcodes.OpExportInstance(instance_name=instance,
300                                            target_node=enode,
301                                            shutdown=True)
302       rem_op = opcodes.OpRemoveInstance(instance_name=instance)
303       nam_op = opcodes.OpQueryInstances(output_fields=["name"],
304                                            names=[instance])
305       full_name = self.ExecOp(nam_op)[0][0]
306       imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
307       imp_op = opcodes.OpCreateInstance(instance_name=instance,
308                                         mem_size=128,
309                                         disk_size=self.opts.os_size,
310                                         swap_size=self.opts.swap_size,
311                                         disk_template=self.opts.disk_template,
312                                         mode=constants.INSTANCE_IMPORT,
313                                         src_node=enode,
314                                         src_path=imp_dir,
315                                         pnode=pnode,
316                                         snode=snode,
317                                         vcpus=1,
318                                         start=True,
319                                         ip_check=True,
320                                         wait_for_sync=True,
321                                         mac="auto")
322       erem_op = opcodes.OpRemoveExport(instance_name=instance)
323
324       Log("- Export instance %s to node %s" % (instance, enode))
325       self.ExecOp(exp_op)
326       Log("- Remove instance %s" % (instance))
327       self.ExecOp(rem_op)
328       self.to_rem.remove(instance)
329       Log("- Import instance %s from node %s to node %s" %
330           (instance, enode, pnode))
331       self.ExecOp(imp_op)
332       Log("- Remove export of instance %s" % (instance))
333       self.ExecOp(erem_op)
334
335       self.to_rem.append(instance)
336
337   def StopStart(self):
338     """Stop/start the instances."""
339     for instance in self.instances:
340       op = opcodes.OpShutdownInstance(instance_name=instance)
341       Log("- Shutdown instance %s" % instance)
342       self.ExecOp(op)
343       op = opcodes.OpStartupInstance(instance_name=instance, force=False)
344       Log("- Start instance %s" % instance)
345       self.ExecOp(op)
346
347   def Remove(self):
348     """Remove the instances."""
349     for instance in self.to_rem:
350       op = opcodes.OpRemoveInstance(instance_name=instance)
351       Log("- Remove instance %s" % instance)
352       self.ExecOp(op)
353
354   def BurninCluster(self):
355     """Test a cluster intensively.
356
357     This will create instances and then start/stop/failover them.
358     It is safe for existing instances but could impact performance.
359
360     """
361
362     opts = self.opts
363
364     Log("- Testing global parameters")
365
366     if (len(self.nodes) == 1 and
367         opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN)):
368       Log("When one node is available/selected the disk template must"
369                " be 'plain' or 'diskless'")
370       sys.exit(1)
371
372     has_err = True
373     try:
374       self.CreateInstances()
375       if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
376         if opts.disk_template == constants.DT_REMOTE_RAID1:
377           self.ReplaceDisks1R1()
378         elif opts.disk_template == constants.DT_DRBD8:
379           self.ReplaceDisks1D8()
380       if (opts.do_replace2 and len(self.nodes) > 2 and
381           opts.disk_template in constants.DTS_NET_MIRROR) :
382         self.ReplaceDisks2()
383
384       if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
385         self.Failover()
386
387       if opts.do_importexport:
388         self.ImportExport()
389
390       if opts.do_startstop:
391         self.StopStart()
392
393       has_err = False
394     finally:
395       if has_err:
396         Log("Error detected: opcode buffer follows:\n\n")
397         Log(self.GetFeedbackBuf())
398         Log("\n\n")
399       self.Remove()
400
401     return 0
402
403
404 def main():
405   """Main function"""
406
407   burner = Burner()
408   try:
409     utils.Lock('cmd', max_retries=15, debug=True)
410   except errors.LockError, err:
411     logger.ToStderr(str(err))
412     return 1
413   try:
414     retval = burner.BurninCluster()
415   finally:
416     utils.Unlock('cmd')
417     utils.LockCleanup()
418   return retval
419
420
421 if __name__ == "__main__":
422   main()