Remove superfluous warnings in HooksRunner
[ganeti-local] / tools / burnin
1 #!/usr/bin/python
2 #
3
4 # Copyright (C) 2006, 2007 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Burnin program
23
24 """
25
26 import sys
27 import optparse
28 import time
29 import socket
30 import urllib
31 from itertools import izip, islice, cycle
32 from cStringIO import StringIO
33
34 from ganeti import opcodes
35 from ganeti import constants
36 from ganeti import cli
37 from ganeti import errors
38 from ganeti import utils
39
40
41 USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
42
43 MAX_RETRIES = 3
44
45 class InstanceDown(Exception):
46   """The checked instance was not up"""
47
48
49 class BurninFailure(Exception):
50   """Failure detected during burning"""
51
52
53 def Usage():
54   """Shows program usage information and exits the program."""
55
56   print >> sys.stderr, "Usage:"
57   print >> sys.stderr, USAGE
58   sys.exit(2)
59
60
61 def Log(msg, indent=0):
62   """Simple function that prints out its argument.
63
64   """
65   headers = {
66     0: "- ",
67     1: "* ",
68     2: ""
69     }
70   sys.stdout.write("%*s%s%s\n" % (2*indent, "",
71                                    headers.get(indent, "  "), msg))
72   sys.stdout.flush()
73
74 def Err(msg, exit_code=1):
75   """Simple error logging that prints to stderr.
76
77   """
78   sys.stderr.write(msg + "\n")
79   sys.stderr.flush()
80   sys.exit(exit_code)
81
82
83 class SimpleOpener(urllib.FancyURLopener):
84   """A simple url opener"""
85   # pylint: disable-msg=W0221
86
87   def prompt_user_passwd(self, host, realm, clear_cache=0):
88     """No-interaction version of prompt_user_passwd."""
89     # we follow parent class' API
90     # pylint: disable-msg=W0613
91     return None, None
92
93   def http_error_default(self, url, fp, errcode, errmsg, headers):
94     """Custom error handling"""
95     # make sure sockets are not left in CLOSE_WAIT, this is similar
96     # but with a different exception to the BasicURLOpener class
97     _ = fp.read() # throw away data
98     fp.close()
99     raise InstanceDown("HTTP error returned: code %s, msg %s" %
100                        (errcode, errmsg))
101
102
103 OPTIONS = [
104   cli.cli_option("-o", "--os", dest="os", default=None,
105                  help="OS to use during burnin",
106                  metavar="<OS>",
107                  completion_suggest=cli.OPT_COMPL_ONE_OS),
108   cli.cli_option("--disk-size", dest="disk_size",
109                  help="Disk size (determines disk count)",
110                  default="128m", type="string", metavar="<size,size,...>",
111                  completion_suggest=("128M 512M 1G 4G 1G,256M"
112                                      " 4G,1G,1G 10G").split()),
113   cli.cli_option("--disk-growth", dest="disk_growth", help="Disk growth",
114                  default="128m", type="string", metavar="<size,size,...>"),
115   cli.cli_option("--mem-size", dest="mem_size", help="Memory size",
116                  default=128, type="unit", metavar="<size>",
117                  completion_suggest=("128M 256M 512M 1G 4G 8G"
118                                      " 12G 16G").split()),
119   cli.DEBUG_OPT,
120   cli.VERBOSE_OPT,
121   cli.NOIPCHECK_OPT,
122   cli.NONAMECHECK_OPT,
123   cli.EARLY_RELEASE_OPT,
124   cli.cli_option("--no-replace1", dest="do_replace1",
125                  help="Skip disk replacement with the same secondary",
126                  action="store_false", default=True),
127   cli.cli_option("--no-replace2", dest="do_replace2",
128                  help="Skip disk replacement with a different secondary",
129                  action="store_false", default=True),
130   cli.cli_option("--no-failover", dest="do_failover",
131                  help="Skip instance failovers", action="store_false",
132                  default=True),
133   cli.cli_option("--no-migrate", dest="do_migrate",
134                  help="Skip instance live migration",
135                  action="store_false", default=True),
136   cli.cli_option("--no-move", dest="do_move",
137                  help="Skip instance moves", action="store_false",
138                  default=True),
139   cli.cli_option("--no-importexport", dest="do_importexport",
140                  help="Skip instance export/import", action="store_false",
141                  default=True),
142   cli.cli_option("--no-startstop", dest="do_startstop",
143                  help="Skip instance stop/start", action="store_false",
144                  default=True),
145   cli.cli_option("--no-reinstall", dest="do_reinstall",
146                  help="Skip instance reinstall", action="store_false",
147                  default=True),
148   cli.cli_option("--no-reboot", dest="do_reboot",
149                  help="Skip instance reboot", action="store_false",
150                  default=True),
151   cli.cli_option("--no-activate-disks", dest="do_activate_disks",
152                  help="Skip disk activation/deactivation",
153                  action="store_false", default=True),
154   cli.cli_option("--no-add-disks", dest="do_addremove_disks",
155                  help="Skip disk addition/removal",
156                  action="store_false", default=True),
157   cli.cli_option("--no-add-nics", dest="do_addremove_nics",
158                  help="Skip NIC addition/removal",
159                  action="store_false", default=True),
160   cli.cli_option("--no-nics", dest="nics",
161                  help="No network interfaces", action="store_const",
162                  const=[], default=[{}]),
163   cli.cli_option("--rename", dest="rename", default=None,
164                  help=("Give one unused instance name which is taken"
165                        " to start the renaming sequence"),
166                  metavar="<instance_name>"),
167   cli.cli_option("-t", "--disk-template", dest="disk_template",
168                  choices=list(constants.DISK_TEMPLATES),
169                  default=constants.DT_DRBD8,
170                  help="Disk template (diskless, file, plain or drbd) [drbd]"),
171   cli.cli_option("-n", "--nodes", dest="nodes", default="",
172                  help=("Comma separated list of nodes to perform"
173                        " the burnin on (defaults to all nodes)"),
174                  completion_suggest=cli.OPT_COMPL_MANY_NODES),
175   cli.cli_option("-I", "--iallocator", dest="iallocator",
176                  default=None, type="string",
177                  help=("Perform the allocation using an iallocator"
178                        " instead of fixed node spread (node restrictions no"
179                        " longer apply, therefore -n/--nodes must not be"
180                        " used"),
181                  completion_suggest=cli.OPT_COMPL_ONE_IALLOCATOR),
182   cli.cli_option("-p", "--parallel", default=False, action="store_true",
183                  dest="parallel",
184                  help=("Enable parallelization of some operations in"
185                        " order to speed burnin or to test granular locking")),
186   cli.cli_option("--net-timeout", default=15, type="int",
187                  dest="net_timeout",
188                  help=("The instance check network timeout in seconds"
189                        " (defaults to 15 seconds)"),
190                  completion_suggest="15 60 300 900".split()),
191   cli.cli_option("-C", "--http-check", default=False, action="store_true",
192                  dest="http_check",
193                  help=("Enable checking of instance status via http,"
194                        " looking for /hostname.txt that should contain the"
195                        " name of the instance")),
196   cli.cli_option("-K", "--keep-instances", default=False,
197                  action="store_true",
198                  dest="keep_instances",
199                  help=("Leave instances on the cluster after burnin,"
200                        " for investigation in case of errors or simply"
201                        " to use them")),
202   ]
203
204 # Mainly used for bash completion
205 ARGUMENTS = [cli.ArgInstance(min=1)]
206
207
208 def _DoCheckInstances(fn):
209   """Decorator for checking instances.
210
211   """
212   def wrapper(self, *args, **kwargs):
213     val = fn(self, *args, **kwargs)
214     for instance in self.instances:
215       self._CheckInstanceAlive(instance) # pylint: disable-msg=W0212
216     return val
217
218   return wrapper
219
220
221 def _DoBatch(retry):
222   """Decorator for possible batch operations.
223
224   Must come after the _DoCheckInstances decorator (if any).
225
226   @param retry: whether this is a retryable batch, will be
227       passed to StartBatch
228
229   """
230   def wrap(fn):
231     def batched(self, *args, **kwargs):
232       self.StartBatch(retry)
233       val = fn(self, *args, **kwargs)
234       self.CommitQueue()
235       return val
236     return batched
237
238   return wrap
239
240
241 class Burner(object):
242   """Burner class."""
243
244   def __init__(self):
245     """Constructor."""
246     utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
247     self.url_opener = SimpleOpener()
248     self._feed_buf = StringIO()
249     self.nodes = []
250     self.instances = []
251     self.to_rem = []
252     self.queued_ops = []
253     self.opts = None
254     self.queue_retry = False
255     self.disk_count = self.disk_growth = self.disk_size = None
256     self.hvp = self.bep = None
257     self.ParseOptions()
258     self.cl = cli.GetClient()
259     self.GetState()
260
261   def ClearFeedbackBuf(self):
262     """Clear the feedback buffer."""
263     self._feed_buf.truncate(0)
264
265   def GetFeedbackBuf(self):
266     """Return the contents of the buffer."""
267     return self._feed_buf.getvalue()
268
269   def Feedback(self, msg):
270     """Acumulate feedback in our buffer."""
271     formatted_msg = "%s %s" % (time.ctime(utils.MergeTime(msg[0])), msg[2])
272     self._feed_buf.write(formatted_msg + "\n")
273     if self.opts.verbose:
274       Log(formatted_msg, indent=3)
275
276   def MaybeRetry(self, retry_count, msg, fn, *args):
277     """Possibly retry a given function execution.
278
279     @type retry_count: int
280     @param retry_count: retry counter:
281         - 0: non-retryable action
282         - 1: last retry for a retryable action
283         - MAX_RETRIES: original try for a retryable action
284     @type msg: str
285     @param msg: the kind of the operation
286     @type fn: callable
287     @param fn: the function to be called
288
289     """
290     try:
291       val = fn(*args)
292       if retry_count > 0 and retry_count < MAX_RETRIES:
293         Log("Idempotent %s succeeded after %d retries" %
294             (msg, MAX_RETRIES - retry_count))
295       return val
296     except Exception, err: # pylint: disable-msg=W0703
297       if retry_count == 0:
298         Log("Non-idempotent %s failed, aborting" % (msg, ))
299         raise
300       elif retry_count == 1:
301         Log("Idempotent %s repeated failure, aborting" % (msg, ))
302         raise
303       else:
304         Log("Idempotent %s failed, retry #%d/%d: %s" %
305             (msg, MAX_RETRIES - retry_count + 1, MAX_RETRIES, err))
306         self.MaybeRetry(retry_count - 1, msg, fn, *args)
307
308   def _SetDebug(self, ops):
309     """Set the debug value on the given opcodes"""
310     for op in ops:
311       op.debug_level = self.opts.debug
312
313   def _ExecOp(self, *ops):
314     """Execute one or more opcodes and manage the exec buffer.
315
316     @result: if only opcode has been passed, we return its result;
317         otherwise we return the list of results
318
319     """
320     job_id = cli.SendJob(ops, cl=self.cl)
321     results = cli.PollJob(job_id, cl=self.cl, feedback_fn=self.Feedback)
322     if len(ops) == 1:
323       return results[0]
324     else:
325       return results
326
327   def ExecOp(self, retry, *ops):
328     """Execute one or more opcodes and manage the exec buffer.
329
330     @result: if only opcode has been passed, we return its result;
331         otherwise we return the list of results
332
333     """
334     if retry:
335       rval = MAX_RETRIES
336     else:
337       rval = 0
338     self._SetDebug(ops)
339     return self.MaybeRetry(rval, "opcode", self._ExecOp, *ops)
340
341   def ExecOrQueue(self, name, *ops):
342     """Execute an opcode and manage the exec buffer."""
343     if self.opts.parallel:
344       self._SetDebug(ops)
345       self.queued_ops.append((ops, name))
346     else:
347       return self.ExecOp(self.queue_retry, *ops)
348
349   def StartBatch(self, retry):
350     """Start a new batch of jobs.
351
352     @param retry: whether this is a retryable batch
353
354     """
355     self.queued_ops = []
356     self.queue_retry = retry
357
358   def CommitQueue(self):
359     """Execute all submitted opcodes in case of parallel burnin"""
360     if not self.opts.parallel:
361       return
362
363     if self.queue_retry:
364       rval = MAX_RETRIES
365     else:
366       rval = 0
367
368     try:
369       results = self.MaybeRetry(rval, "jobset", self.ExecJobSet,
370                                 self.queued_ops)
371     finally:
372       self.queued_ops = []
373     return results
374
375   def ExecJobSet(self, jobs):
376     """Execute a set of jobs and return once all are done.
377
378     The method will return the list of results, if all jobs are
379     successful. Otherwise, OpExecError will be raised from within
380     cli.py.
381
382     """
383     self.ClearFeedbackBuf()
384     job_ids = [cli.SendJob(row[0], cl=self.cl) for row in jobs]
385     Log("Submitted job ID(s) %s" % utils.CommaJoin(job_ids), indent=1)
386     results = []
387     for jid, (_, iname) in zip(job_ids, jobs):
388       Log("waiting for job %s for %s" % (jid, iname), indent=2)
389       try:
390         results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
391       except Exception, err: # pylint: disable-msg=W0703
392         Log("Job for %s failed: %s" % (iname, err))
393     if len(results) != len(jobs):
394       raise BurninFailure()
395     return results
396
397   def ParseOptions(self):
398     """Parses the command line options.
399
400     In case of command line errors, it will show the usage and exit the
401     program.
402
403     """
404     parser = optparse.OptionParser(usage="\n%s" % USAGE,
405                                    version=("%%prog (ganeti) %s" %
406                                             constants.RELEASE_VERSION),
407                                    option_list=OPTIONS)
408
409     options, args = parser.parse_args()
410     if len(args) < 1 or options.os is None:
411       Usage()
412
413     supported_disk_templates = (constants.DT_DISKLESS,
414                                 constants.DT_FILE,
415                                 constants.DT_PLAIN,
416                                 constants.DT_DRBD8)
417     if options.disk_template not in supported_disk_templates:
418       Err("Unknown disk template '%s'" % options.disk_template)
419
420     if options.disk_template == constants.DT_DISKLESS:
421       disk_size = disk_growth = []
422       options.do_addremove_disks = False
423     else:
424       disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
425       disk_growth = [utils.ParseUnit(v)
426                      for v in options.disk_growth.split(",")]
427       if len(disk_growth) != len(disk_size):
428         Err("Wrong disk sizes/growth combination")
429     if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
430         (not disk_size and options.disk_template != constants.DT_DISKLESS)):
431       Err("Wrong disk count/disk template combination")
432
433     self.disk_size = disk_size
434     self.disk_growth = disk_growth
435     self.disk_count = len(disk_size)
436
437     if options.nodes and options.iallocator:
438       Err("Give either the nodes option or the iallocator option, not both")
439
440     if options.http_check and not options.name_check:
441       Err("Can't enable HTTP checks without name checks")
442
443     self.opts = options
444     self.instances = args
445     self.bep = {
446       constants.BE_MEMORY: options.mem_size,
447       constants.BE_VCPUS: 1,
448       }
449     self.hvp = {}
450
451     socket.setdefaulttimeout(options.net_timeout)
452
453   def GetState(self):
454     """Read the cluster state from the config."""
455     if self.opts.nodes:
456       names = self.opts.nodes.split(",")
457     else:
458       names = []
459     try:
460       op = opcodes.OpQueryNodes(output_fields=["name", "offline", "drained"],
461                                 names=names, use_locking=True)
462       result = self.ExecOp(True, op)
463     except errors.GenericError, err:
464       err_code, msg = cli.FormatError(err)
465       Err(msg, exit_code=err_code)
466     self.nodes = [data[0] for data in result if not (data[1] or data[2])]
467
468     op_diagnose = opcodes.OpDiagnoseOS(output_fields=["name", "valid",
469                                                       "variants"], names=[])
470     result = self.ExecOp(True, op_diagnose)
471
472     if not result:
473       Err("Can't get the OS list")
474
475     found = False
476     for (name, valid, variants) in result:
477       if valid and self.opts.os in cli.CalculateOSNames(name, variants):
478         found = True
479         break
480
481     if not found:
482       Err("OS '%s' not found" % self.opts.os)
483
484   @_DoCheckInstances
485   @_DoBatch(False)
486   def BurnCreateInstances(self):
487     """Create the given instances.
488
489     """
490     self.to_rem = []
491     mytor = izip(cycle(self.nodes),
492                  islice(cycle(self.nodes), 1, None),
493                  self.instances)
494
495     Log("Creating instances")
496     for pnode, snode, instance in mytor:
497       Log("instance %s" % instance, indent=1)
498       if self.opts.iallocator:
499         pnode = snode = None
500         msg = "with iallocator %s" % self.opts.iallocator
501       elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
502         snode = None
503         msg = "on %s" % pnode
504       else:
505         msg = "on %s, %s" % (pnode, snode)
506
507       Log(msg, indent=2)
508
509       op = opcodes.OpCreateInstance(instance_name=instance,
510                                     disks = [ {"size": size}
511                                               for size in self.disk_size],
512                                     disk_template=self.opts.disk_template,
513                                     nics=self.opts.nics,
514                                     mode=constants.INSTANCE_CREATE,
515                                     os_type=self.opts.os,
516                                     pnode=pnode,
517                                     snode=snode,
518                                     start=True,
519                                     ip_check=self.opts.ip_check,
520                                     name_check=self.opts.name_check,
521                                     wait_for_sync=True,
522                                     file_driver="loop",
523                                     file_storage_dir=None,
524                                     iallocator=self.opts.iallocator,
525                                     beparams=self.bep,
526                                     hvparams=self.hvp,
527                                     )
528
529       self.ExecOrQueue(instance, op)
530       self.to_rem.append(instance)
531
532   @_DoBatch(False)
533   def BurnGrowDisks(self):
534     """Grow both the os and the swap disks by the requested amount, if any."""
535     Log("Growing disks")
536     for instance in self.instances:
537       Log("instance %s" % instance, indent=1)
538       for idx, growth in enumerate(self.disk_growth):
539         if growth > 0:
540           op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
541                                   amount=growth, wait_for_sync=True)
542           Log("increase disk/%s by %s MB" % (idx, growth), indent=2)
543           self.ExecOrQueue(instance, op)
544
545   @_DoBatch(True)
546   def BurnReplaceDisks1D8(self):
547     """Replace disks on primary and secondary for drbd8."""
548     Log("Replacing disks on the same nodes")
549     for instance in self.instances:
550       Log("instance %s" % instance, indent=1)
551       ops = []
552       for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
553         op = opcodes.OpReplaceDisks(instance_name=instance,
554                                     mode=mode,
555                                     disks=[i for i in range(self.disk_count)],
556                                     early_release=self.opts.early_release)
557         Log("run %s" % mode, indent=2)
558         ops.append(op)
559       self.ExecOrQueue(instance, *ops) # pylint: disable-msg=W0142
560
561   @_DoBatch(True)
562   def BurnReplaceDisks2(self):
563     """Replace secondary node."""
564     Log("Changing the secondary node")
565     mode = constants.REPLACE_DISK_CHG
566
567     mytor = izip(islice(cycle(self.nodes), 2, None),
568                  self.instances)
569     for tnode, instance in mytor:
570       Log("instance %s" % instance, indent=1)
571       if self.opts.iallocator:
572         tnode = None
573         msg = "with iallocator %s" % self.opts.iallocator
574       else:
575         msg = tnode
576       op = opcodes.OpReplaceDisks(instance_name=instance,
577                                   mode=mode,
578                                   remote_node=tnode,
579                                   iallocator=self.opts.iallocator,
580                                   disks=[],
581                                   early_release=self.opts.early_release)
582       Log("run %s %s" % (mode, msg), indent=2)
583       self.ExecOrQueue(instance, op)
584
585   @_DoCheckInstances
586   @_DoBatch(False)
587   def BurnFailover(self):
588     """Failover the instances."""
589     Log("Failing over instances")
590     for instance in self.instances:
591       Log("instance %s" % instance, indent=1)
592       op = opcodes.OpFailoverInstance(instance_name=instance,
593                                       ignore_consistency=False)
594       self.ExecOrQueue(instance, op)
595
596   @_DoCheckInstances
597   @_DoBatch(False)
598   def BurnMove(self):
599     """Move the instances."""
600     Log("Moving instances")
601     mytor = izip(islice(cycle(self.nodes), 1, None),
602                  self.instances)
603     for tnode, instance in mytor:
604       Log("instance %s" % instance, indent=1)
605       op = opcodes.OpMoveInstance(instance_name=instance,
606                                   target_node=tnode)
607       self.ExecOrQueue(instance, op)
608
609   @_DoBatch(False)
610   def BurnMigrate(self):
611     """Migrate the instances."""
612     Log("Migrating instances")
613     for instance in self.instances:
614       Log("instance %s" % instance, indent=1)
615       op1 = opcodes.OpMigrateInstance(instance_name=instance, live=True,
616                                       cleanup=False)
617
618       op2 = opcodes.OpMigrateInstance(instance_name=instance, live=True,
619                                       cleanup=True)
620       Log("migration and migration cleanup", indent=2)
621       self.ExecOrQueue(instance, op1, op2)
622
623   @_DoCheckInstances
624   @_DoBatch(False)
625   def BurnImportExport(self):
626     """Export the instance, delete it, and import it back.
627
628     """
629     Log("Exporting and re-importing instances")
630     mytor = izip(cycle(self.nodes),
631                  islice(cycle(self.nodes), 1, None),
632                  islice(cycle(self.nodes), 2, None),
633                  self.instances)
634
635     for pnode, snode, enode, instance in mytor:
636       Log("instance %s" % instance, indent=1)
637       # read the full name of the instance
638       nam_op = opcodes.OpQueryInstances(output_fields=["name"],
639                                         names=[instance], use_locking=True)
640       full_name = self.ExecOp(False, nam_op)[0][0]
641
642       if self.opts.iallocator:
643         pnode = snode = None
644         import_log_msg = ("import from %s"
645                           " with iallocator %s" %
646                           (enode, self.opts.iallocator))
647       elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
648         snode = None
649         import_log_msg = ("import from %s to %s" %
650                           (enode, pnode))
651       else:
652         import_log_msg = ("import from %s to %s, %s" %
653                           (enode, pnode, snode))
654
655       exp_op = opcodes.OpExportInstance(instance_name=instance,
656                                            target_node=enode,
657                                            shutdown=True)
658       rem_op = opcodes.OpRemoveInstance(instance_name=instance,
659                                         ignore_failures=True)
660       imp_dir = utils.PathJoin(constants.EXPORT_DIR, full_name)
661       imp_op = opcodes.OpCreateInstance(instance_name=instance,
662                                         disks = [ {"size": size}
663                                                   for size in self.disk_size],
664                                         disk_template=self.opts.disk_template,
665                                         nics=self.opts.nics,
666                                         mode=constants.INSTANCE_IMPORT,
667                                         src_node=enode,
668                                         src_path=imp_dir,
669                                         pnode=pnode,
670                                         snode=snode,
671                                         start=True,
672                                         ip_check=self.opts.ip_check,
673                                         name_check=self.opts.name_check,
674                                         wait_for_sync=True,
675                                         file_storage_dir=None,
676                                         file_driver="loop",
677                                         iallocator=self.opts.iallocator,
678                                         beparams=self.bep,
679                                         hvparams=self.hvp,
680                                         )
681
682       erem_op = opcodes.OpRemoveExport(instance_name=instance)
683
684       Log("export to node %s" % enode, indent=2)
685       Log("remove instance", indent=2)
686       Log(import_log_msg, indent=2)
687       Log("remove export", indent=2)
688       self.ExecOrQueue(instance, exp_op, rem_op, imp_op, erem_op)
689
690   @staticmethod
691   def StopInstanceOp(instance):
692     """Stop given instance."""
693     return opcodes.OpShutdownInstance(instance_name=instance)
694
695   @staticmethod
696   def StartInstanceOp(instance):
697     """Start given instance."""
698     return opcodes.OpStartupInstance(instance_name=instance, force=False)
699
700   @staticmethod
701   def RenameInstanceOp(instance, instance_new):
702     """Rename instance."""
703     return opcodes.OpRenameInstance(instance_name=instance,
704                                     new_name=instance_new)
705
706   @_DoCheckInstances
707   @_DoBatch(True)
708   def BurnStopStart(self):
709     """Stop/start the instances."""
710     Log("Stopping and starting instances")
711     for instance in self.instances:
712       Log("instance %s" % instance, indent=1)
713       op1 = self.StopInstanceOp(instance)
714       op2 = self.StartInstanceOp(instance)
715       self.ExecOrQueue(instance, op1, op2)
716
717   @_DoBatch(False)
718   def BurnRemove(self):
719     """Remove the instances."""
720     Log("Removing instances")
721     for instance in self.to_rem:
722       Log("instance %s" % instance, indent=1)
723       op = opcodes.OpRemoveInstance(instance_name=instance,
724                                     ignore_failures=True)
725       self.ExecOrQueue(instance, op)
726
727   def BurnRename(self):
728     """Rename the instances.
729
730     Note that this function will not execute in parallel, since we
731     only have one target for rename.
732
733     """
734     Log("Renaming instances")
735     rename = self.opts.rename
736     for instance in self.instances:
737       Log("instance %s" % instance, indent=1)
738       op_stop1 = self.StopInstanceOp(instance)
739       op_stop2 = self.StopInstanceOp(rename)
740       op_rename1 = self.RenameInstanceOp(instance, rename)
741       op_rename2 = self.RenameInstanceOp(rename, instance)
742       op_start1 = self.StartInstanceOp(rename)
743       op_start2 = self.StartInstanceOp(instance)
744       self.ExecOp(False, op_stop1, op_rename1, op_start1)
745       self._CheckInstanceAlive(rename)
746       self.ExecOp(False, op_stop2, op_rename2, op_start2)
747       self._CheckInstanceAlive(instance)
748
749   @_DoCheckInstances
750   @_DoBatch(True)
751   def BurnReinstall(self):
752     """Reinstall the instances."""
753     Log("Reinstalling instances")
754     for instance in self.instances:
755       Log("instance %s" % instance, indent=1)
756       op1 = self.StopInstanceOp(instance)
757       op2 = opcodes.OpReinstallInstance(instance_name=instance)
758       Log("reinstall without passing the OS", indent=2)
759       op3 = opcodes.OpReinstallInstance(instance_name=instance,
760                                         os_type=self.opts.os)
761       Log("reinstall specifying the OS", indent=2)
762       op4 = self.StartInstanceOp(instance)
763       self.ExecOrQueue(instance, op1, op2, op3, op4)
764
765   @_DoCheckInstances
766   @_DoBatch(True)
767   def BurnReboot(self):
768     """Reboot the instances."""
769     Log("Rebooting instances")
770     for instance in self.instances:
771       Log("instance %s" % instance, indent=1)
772       ops = []
773       for reboot_type in constants.REBOOT_TYPES:
774         op = opcodes.OpRebootInstance(instance_name=instance,
775                                       reboot_type=reboot_type,
776                                       ignore_secondaries=False)
777         Log("reboot with type '%s'" % reboot_type, indent=2)
778         ops.append(op)
779       self.ExecOrQueue(instance, *ops) # pylint: disable-msg=W0142
780
781   @_DoCheckInstances
782   @_DoBatch(True)
783   def BurnActivateDisks(self):
784     """Activate and deactivate disks of the instances."""
785     Log("Activating/deactivating disks")
786     for instance in self.instances:
787       Log("instance %s" % instance, indent=1)
788       op_start = self.StartInstanceOp(instance)
789       op_act = opcodes.OpActivateInstanceDisks(instance_name=instance)
790       op_deact = opcodes.OpDeactivateInstanceDisks(instance_name=instance)
791       op_stop = self.StopInstanceOp(instance)
792       Log("activate disks when online", indent=2)
793       Log("activate disks when offline", indent=2)
794       Log("deactivate disks (when offline)", indent=2)
795       self.ExecOrQueue(instance, op_act, op_stop, op_act, op_deact, op_start)
796
797   @_DoCheckInstances
798   @_DoBatch(False)
799   def BurnAddRemoveDisks(self):
800     """Add and remove an extra disk for the instances."""
801     Log("Adding and removing disks")
802     for instance in self.instances:
803       Log("instance %s" % instance, indent=1)
804       op_add = opcodes.OpSetInstanceParams(\
805         instance_name=instance,
806         disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})])
807       op_rem = opcodes.OpSetInstanceParams(\
808         instance_name=instance, disks=[(constants.DDM_REMOVE, {})])
809       op_stop = self.StopInstanceOp(instance)
810       op_start = self.StartInstanceOp(instance)
811       Log("adding a disk", indent=2)
812       Log("removing last disk", indent=2)
813       self.ExecOrQueue(instance, op_add, op_stop, op_rem, op_start)
814
815   @_DoBatch(False)
816   def BurnAddRemoveNICs(self):
817     """Add and remove an extra NIC for the instances."""
818     Log("Adding and removing NICs")
819     for instance in self.instances:
820       Log("instance %s" % instance, indent=1)
821       op_add = opcodes.OpSetInstanceParams(\
822         instance_name=instance, nics=[(constants.DDM_ADD, {})])
823       op_rem = opcodes.OpSetInstanceParams(\
824         instance_name=instance, nics=[(constants.DDM_REMOVE, {})])
825       Log("adding a NIC", indent=2)
826       Log("removing last NIC", indent=2)
827       self.ExecOrQueue(instance, op_add, op_rem)
828
829   def _CheckInstanceAlive(self, instance):
830     """Check if an instance is alive by doing http checks.
831
832     This will try to retrieve the url on the instance /hostname.txt
833     and check that it contains the hostname of the instance. In case
834     we get ECONNREFUSED, we retry up to the net timeout seconds, for
835     any other error we abort.
836
837     """
838     if not self.opts.http_check:
839       return
840     end_time = time.time() + self.opts.net_timeout
841     url = None
842     while time.time() < end_time and url is None:
843       try:
844         url = self.url_opener.open("http://%s/hostname.txt" % instance)
845       except IOError:
846         # here we can have connection refused, no route to host, etc.
847         time.sleep(1)
848     if url is None:
849       raise InstanceDown(instance, "Cannot contact instance")
850     hostname = url.read().strip()
851     url.close()
852     if hostname != instance:
853       raise InstanceDown(instance, ("Hostname mismatch, expected %s, got %s" %
854                                     (instance, hostname)))
855
856   def BurninCluster(self):
857     """Test a cluster intensively.
858
859     This will create instances and then start/stop/failover them.
860     It is safe for existing instances but could impact performance.
861
862     """
863
864     opts = self.opts
865
866     Log("Testing global parameters")
867
868     if (len(self.nodes) == 1 and
869         opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
870                                    constants.DT_FILE)):
871       Err("When one node is available/selected the disk template must"
872           " be 'diskless', 'file' or 'plain'")
873
874     has_err = True
875     try:
876       self.BurnCreateInstances()
877       if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
878         self.BurnReplaceDisks1D8()
879       if (opts.do_replace2 and len(self.nodes) > 2 and
880           opts.disk_template in constants.DTS_NET_MIRROR) :
881         self.BurnReplaceDisks2()
882
883       if (opts.disk_template != constants.DT_DISKLESS and
884           utils.any(self.disk_growth, lambda n: n > 0)):
885         self.BurnGrowDisks()
886
887       if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
888         self.BurnFailover()
889
890       if opts.do_migrate and opts.disk_template == constants.DT_DRBD8:
891         self.BurnMigrate()
892
893       if (opts.do_move and len(self.nodes) > 1 and
894           opts.disk_template in [constants.DT_PLAIN, constants.DT_FILE]):
895         self.BurnMove()
896
897       if (opts.do_importexport and
898           opts.disk_template not in (constants.DT_DISKLESS,
899                                      constants.DT_FILE)):
900         self.BurnImportExport()
901
902       if opts.do_reinstall:
903         self.BurnReinstall()
904
905       if opts.do_reboot:
906         self.BurnReboot()
907
908       if opts.do_addremove_disks:
909         self.BurnAddRemoveDisks()
910
911       if opts.do_addremove_nics:
912         self.BurnAddRemoveNICs()
913
914       if opts.do_activate_disks:
915         self.BurnActivateDisks()
916
917       if opts.rename:
918         self.BurnRename()
919
920       if opts.do_startstop:
921         self.BurnStopStart()
922
923       has_err = False
924     finally:
925       if has_err:
926         Log("Error detected: opcode buffer follows:\n\n")
927         Log(self.GetFeedbackBuf())
928         Log("\n\n")
929       if not self.opts.keep_instances:
930         try:
931           self.BurnRemove()
932         except Exception, err:  # pylint: disable-msg=W0703
933           if has_err: # already detected errors, so errors in removal
934                       # are quite expected
935             Log("Note: error detected during instance remove: %s" % str(err))
936           else: # non-expected error
937             raise
938
939     return 0
940
941
942 def main():
943   """Main function"""
944
945   burner = Burner()
946   return burner.BurninCluster()
947
948
949 if __name__ == "__main__":
950   main()