Introduce a Luxi call for GetTags
[ganeti-local] / tools / burnin
1 #!/usr/bin/python
2 #
3
4 # Copyright (C) 2006, 2007 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Burnin program
23
24 """
25
26 import os
27 import sys
28 import optparse
29 import time
30 import socket
31 import urllib
32 from itertools import izip, islice, cycle
33 from cStringIO import StringIO
34
35 from ganeti import opcodes
36 from ganeti import constants
37 from ganeti import cli
38 from ganeti import errors
39 from ganeti import utils
40
41
42 USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
43
44 MAX_RETRIES = 3
45
46 class InstanceDown(Exception):
47   """The checked instance was not up"""
48
49
50 class BurninFailure(Exception):
51   """Failure detected during burning"""
52
53
54 def Usage():
55   """Shows program usage information and exits the program."""
56
57   print >> sys.stderr, "Usage:"
58   print >> sys.stderr, USAGE
59   sys.exit(2)
60
61
62 def Log(msg, indent=0):
63   """Simple function that prints out its argument.
64
65   """
66   headers = {
67     0: "- ",
68     1: "* ",
69     2: ""
70     }
71   sys.stdout.write("%*s%s%s\n" % (2*indent, "",
72                                    headers.get(indent, "  "), msg))
73   sys.stdout.flush()
74
75 def Err(msg, exit_code=1):
76   """Simple error logging that prints to stderr.
77
78   """
79   sys.stderr.write(msg + "\n")
80   sys.stderr.flush()
81   sys.exit(exit_code)
82
83
84 class SimpleOpener(urllib.FancyURLopener):
85   """A simple url opener"""
86   # pylint: disable-msg=W0221
87
88   def prompt_user_passwd(self, host, realm, clear_cache=0):
89     """No-interaction version of prompt_user_passwd."""
90     # we follow parent class' API
91     # pylint: disable-msg=W0613
92     return None, None
93
94   def http_error_default(self, url, fp, errcode, errmsg, headers):
95     """Custom error handling"""
96     # make sure sockets are not left in CLOSE_WAIT, this is similar
97     # but with a different exception to the BasicURLOpener class
98     _ = fp.read() # throw away data
99     fp.close()
100     raise InstanceDown("HTTP error returned: code %s, msg %s" %
101                        (errcode, errmsg))
102
103
104 OPTIONS = [
105   cli.cli_option("-o", "--os", dest="os", default=None,
106                  help="OS to use during burnin",
107                  metavar="<OS>",
108                  completion_suggest=cli.OPT_COMPL_ONE_OS),
109   cli.cli_option("--disk-size", dest="disk_size",
110                  help="Disk size (determines disk count)",
111                  default="128m", type="string", metavar="<size,size,...>",
112                  completion_suggest=("128M 512M 1G 4G 1G,256M"
113                                      " 4G,1G,1G 10G").split()),
114   cli.cli_option("--disk-growth", dest="disk_growth", help="Disk growth",
115                  default="128m", type="string", metavar="<size,size,...>"),
116   cli.cli_option("--mem-size", dest="mem_size", help="Memory size",
117                  default=128, type="unit", metavar="<size>",
118                  completion_suggest=("128M 256M 512M 1G 4G 8G"
119                                      " 12G 16G").split()),
120   cli.VERBOSE_OPT,
121   cli.NOIPCHECK_OPT,
122   cli.NONAMECHECK_OPT,
123   cli.cli_option("--no-replace1", dest="do_replace1",
124                  help="Skip disk replacement with the same secondary",
125                  action="store_false", default=True),
126   cli.cli_option("--no-replace2", dest="do_replace2",
127                  help="Skip disk replacement with a different secondary",
128                  action="store_false", default=True),
129   cli.cli_option("--no-failover", dest="do_failover",
130                  help="Skip instance failovers", action="store_false",
131                  default=True),
132   cli.cli_option("--no-migrate", dest="do_migrate",
133                  help="Skip instance live migration",
134                  action="store_false", default=True),
135   cli.cli_option("--no-move", dest="do_move",
136                  help="Skip instance moves", action="store_false",
137                  default=True),
138   cli.cli_option("--no-importexport", dest="do_importexport",
139                  help="Skip instance export/import", action="store_false",
140                  default=True),
141   cli.cli_option("--no-startstop", dest="do_startstop",
142                  help="Skip instance stop/start", action="store_false",
143                  default=True),
144   cli.cli_option("--no-reinstall", dest="do_reinstall",
145                  help="Skip instance reinstall", action="store_false",
146                  default=True),
147   cli.cli_option("--no-reboot", dest="do_reboot",
148                  help="Skip instance reboot", action="store_false",
149                  default=True),
150   cli.cli_option("--no-activate-disks", dest="do_activate_disks",
151                  help="Skip disk activation/deactivation",
152                  action="store_false", default=True),
153   cli.cli_option("--no-add-disks", dest="do_addremove_disks",
154                  help="Skip disk addition/removal",
155                  action="store_false", default=True),
156   cli.cli_option("--no-add-nics", dest="do_addremove_nics",
157                  help="Skip NIC addition/removal",
158                  action="store_false", default=True),
159   cli.cli_option("--no-nics", dest="nics",
160                  help="No network interfaces", action="store_const",
161                  const=[], default=[{}]),
162   cli.cli_option("--rename", dest="rename", default=None,
163                  help=("Give one unused instance name which is taken"
164                        " to start the renaming sequence"),
165                  metavar="<instance_name>"),
166   cli.cli_option("-t", "--disk-template", dest="disk_template",
167                  choices=list(constants.DISK_TEMPLATES),
168                  default=constants.DT_DRBD8,
169                  help="Disk template (diskless, file, plain or drbd) [drbd]"),
170   cli.cli_option("-n", "--nodes", dest="nodes", default="",
171                  help=("Comma separated list of nodes to perform"
172                        " the burnin on (defaults to all nodes)"),
173                  completion_suggest=cli.OPT_COMPL_MANY_NODES),
174   cli.cli_option("-I", "--iallocator", dest="iallocator",
175                  default=None, type="string",
176                  help=("Perform the allocation using an iallocator"
177                        " instead of fixed node spread (node restrictions no"
178                        " longer apply, therefore -n/--nodes must not be"
179                        " used"),
180                  completion_suggest=cli.OPT_COMPL_ONE_IALLOCATOR),
181   cli.cli_option("-p", "--parallel", default=False, action="store_true",
182                  dest="parallel",
183                  help=("Enable parallelization of some operations in"
184                        " order to speed burnin or to test granular locking")),
185   cli.cli_option("--net-timeout", default=15, type="int",
186                  dest="net_timeout",
187                  help=("The instance check network timeout in seconds"
188                        " (defaults to 15 seconds)"),
189                  completion_suggest="15 60 300 900".split()),
190   cli.cli_option("-C", "--http-check", default=False, action="store_true",
191                  dest="http_check",
192                  help=("Enable checking of instance status via http,"
193                        " looking for /hostname.txt that should contain the"
194                        " name of the instance")),
195   cli.cli_option("-K", "--keep-instances", default=False,
196                  action="store_true",
197                  dest="keep_instances",
198                  help=("Leave instances on the cluster after burnin,"
199                        " for investigation in case of errors or simply"
200                        " to use them")),
201   ]
202
203 # Mainly used for bash completion
204 ARGUMENTS = [cli.ArgInstance(min=1)]
205
206
207 def _DoCheckInstances(fn):
208   """Decorator for checking instances.
209
210   """
211   def wrapper(self, *args, **kwargs):
212     val = fn(self, *args, **kwargs)
213     for instance in self.instances:
214       self._CheckInstanceAlive(instance) # pylint: disable-msg=W0212
215     return val
216
217   return wrapper
218
219
220 def _DoBatch(retry):
221   """Decorator for possible batch operations.
222
223   Must come after the _DoCheckInstances decorator (if any).
224
225   @param retry: whether this is a retryable batch, will be
226       passed to StartBatch
227
228   """
229   def wrap(fn):
230     def batched(self, *args, **kwargs):
231       self.StartBatch(retry)
232       val = fn(self, *args, **kwargs)
233       self.CommitQueue()
234       return val
235     return batched
236
237   return wrap
238
239
240 class Burner(object):
241   """Burner class."""
242
243   def __init__(self):
244     """Constructor."""
245     utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
246     self.url_opener = SimpleOpener()
247     self._feed_buf = StringIO()
248     self.nodes = []
249     self.instances = []
250     self.to_rem = []
251     self.queued_ops = []
252     self.opts = None
253     self.queue_retry = False
254     self.disk_count = self.disk_growth = self.disk_size = None
255     self.hvp = self.bep = None
256     self.ParseOptions()
257     self.cl = cli.GetClient()
258     self.GetState()
259
260   def ClearFeedbackBuf(self):
261     """Clear the feedback buffer."""
262     self._feed_buf.truncate(0)
263
264   def GetFeedbackBuf(self):
265     """Return the contents of the buffer."""
266     return self._feed_buf.getvalue()
267
268   def Feedback(self, msg):
269     """Acumulate feedback in our buffer."""
270     formatted_msg = "%s %s" % (time.ctime(utils.MergeTime(msg[0])), msg[2])
271     self._feed_buf.write(formatted_msg + "\n")
272     if self.opts.verbose:
273       Log(formatted_msg, indent=3)
274
275   def MaybeRetry(self, retry_count, msg, fn, *args):
276     """Possibly retry a given function execution.
277
278     @type retry_count: int
279     @param retry_count: retry counter:
280         - 0: non-retryable action
281         - 1: last retry for a retryable action
282         - MAX_RETRIES: original try for a retryable action
283     @type msg: str
284     @param msg: the kind of the operation
285     @type fn: callable
286     @param fn: the function to be called
287
288     """
289     try:
290       val = fn(*args)
291       if retry_count > 0 and retry_count < MAX_RETRIES:
292         Log("Idempotent %s succeeded after %d retries" %
293             (msg, MAX_RETRIES - retry_count))
294       return val
295     except Exception, err: # pylint: disable-msg=W0703
296       if retry_count == 0:
297         Log("Non-idempotent %s failed, aborting" % (msg, ))
298         raise
299       elif retry_count == 1:
300         Log("Idempotent %s repeated failure, aborting" % (msg, ))
301         raise
302       else:
303         Log("Idempotent %s failed, retry #%d/%d: %s" %
304             (msg, MAX_RETRIES - retry_count + 1, MAX_RETRIES, err))
305         self.MaybeRetry(retry_count - 1, msg, fn, *args)
306
307   def _ExecOp(self, *ops):
308     """Execute one or more opcodes and manage the exec buffer.
309
310     @result: if only opcode has been passed, we return its result;
311         otherwise we return the list of results
312
313     """
314     job_id = cli.SendJob(ops, cl=self.cl)
315     results = cli.PollJob(job_id, cl=self.cl, feedback_fn=self.Feedback)
316     if len(ops) == 1:
317       return results[0]
318     else:
319       return results
320
321   def ExecOp(self, retry, *ops):
322     """Execute one or more opcodes and manage the exec buffer.
323
324     @result: if only opcode has been passed, we return its result;
325         otherwise we return the list of results
326
327     """
328     if retry:
329       rval = MAX_RETRIES
330     else:
331       rval = 0
332     return self.MaybeRetry(rval, "opcode", self._ExecOp, *ops)
333
334   def ExecOrQueue(self, name, *ops):
335     """Execute an opcode and manage the exec buffer."""
336     if self.opts.parallel:
337       self.queued_ops.append((ops, name))
338     else:
339       return self.ExecOp(self.queue_retry, *ops)
340
341   def StartBatch(self, retry):
342     """Start a new batch of jobs.
343
344     @param retry: whether this is a retryable batch
345
346     """
347     self.queued_ops = []
348     self.queue_retry = retry
349
350   def CommitQueue(self):
351     """Execute all submitted opcodes in case of parallel burnin"""
352     if not self.opts.parallel:
353       return
354
355     if self.queue_retry:
356       rval = MAX_RETRIES
357     else:
358       rval = 0
359
360     try:
361       results = self.MaybeRetry(rval, "jobset", self.ExecJobSet,
362                                 self.queued_ops)
363     finally:
364       self.queued_ops = []
365     return results
366
367   def ExecJobSet(self, jobs):
368     """Execute a set of jobs and return once all are done.
369
370     The method will return the list of results, if all jobs are
371     successful. Otherwise, OpExecError will be raised from within
372     cli.py.
373
374     """
375     self.ClearFeedbackBuf()
376     job_ids = [cli.SendJob(row[0], cl=self.cl) for row in jobs]
377     Log("Submitted job ID(s) %s" % utils.CommaJoin(job_ids), indent=1)
378     results = []
379     for jid, (_, iname) in zip(job_ids, jobs):
380       Log("waiting for job %s for %s" % (jid, iname), indent=2)
381       try:
382         results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
383       except Exception, err: # pylint: disable-msg=W0703
384         Log("Job for %s failed: %s" % (iname, err))
385     if len(results) != len(jobs):
386       raise BurninFailure()
387     return results
388
389   def ParseOptions(self):
390     """Parses the command line options.
391
392     In case of command line errors, it will show the usage and exit the
393     program.
394
395     """
396     parser = optparse.OptionParser(usage="\n%s" % USAGE,
397                                    version=("%%prog (ganeti) %s" %
398                                             constants.RELEASE_VERSION),
399                                    option_list=OPTIONS)
400
401     options, args = parser.parse_args()
402     if len(args) < 1 or options.os is None:
403       Usage()
404
405     supported_disk_templates = (constants.DT_DISKLESS,
406                                 constants.DT_FILE,
407                                 constants.DT_PLAIN,
408                                 constants.DT_DRBD8)
409     if options.disk_template not in supported_disk_templates:
410       Err("Unknown disk template '%s'" % options.disk_template)
411
412     if options.disk_template == constants.DT_DISKLESS:
413       disk_size = disk_growth = []
414       options.do_addremove_disks = False
415     else:
416       disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
417       disk_growth = [utils.ParseUnit(v)
418                      for v in options.disk_growth.split(",")]
419       if len(disk_growth) != len(disk_size):
420         Err("Wrong disk sizes/growth combination")
421     if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
422         (not disk_size and options.disk_template != constants.DT_DISKLESS)):
423       Err("Wrong disk count/disk template combination")
424
425     self.disk_size = disk_size
426     self.disk_growth = disk_growth
427     self.disk_count = len(disk_size)
428
429     if options.nodes and options.iallocator:
430       Err("Give either the nodes option or the iallocator option, not both")
431
432     if options.http_check and not options.name_check:
433       Err("Can't enable HTTP checks without name checks")
434
435     self.opts = options
436     self.instances = args
437     self.bep = {
438       constants.BE_MEMORY: options.mem_size,
439       constants.BE_VCPUS: 1,
440       }
441     self.hvp = {}
442
443     socket.setdefaulttimeout(options.net_timeout)
444
445   def GetState(self):
446     """Read the cluster state from the config."""
447     if self.opts.nodes:
448       names = self.opts.nodes.split(",")
449     else:
450       names = []
451     try:
452       op = opcodes.OpQueryNodes(output_fields=["name", "offline", "drained"],
453                                 names=names, use_locking=True)
454       result = self.ExecOp(True, op)
455     except errors.GenericError, err:
456       err_code, msg = cli.FormatError(err)
457       Err(msg, exit_code=err_code)
458     self.nodes = [data[0] for data in result if not (data[1] or data[2])]
459
460     op_diagnose = opcodes.OpDiagnoseOS(output_fields=["name", "valid",
461                                                       "variants"], names=[])
462     result = self.ExecOp(True, op_diagnose)
463
464     if not result:
465       Err("Can't get the OS list")
466
467     found = False
468     for (name, valid, variants) in result:
469       if valid and self.opts.os in cli.CalculateOSNames(name, variants):
470         found = True
471         break
472
473     if not found:
474       Err("OS '%s' not found" % self.opts.os)
475
476   @_DoCheckInstances
477   @_DoBatch(False)
478   def BurnCreateInstances(self):
479     """Create the given instances.
480
481     """
482     self.to_rem = []
483     mytor = izip(cycle(self.nodes),
484                  islice(cycle(self.nodes), 1, None),
485                  self.instances)
486
487     Log("Creating instances")
488     for pnode, snode, instance in mytor:
489       Log("instance %s" % instance, indent=1)
490       if self.opts.iallocator:
491         pnode = snode = None
492         msg = "with iallocator %s" % self.opts.iallocator
493       elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
494         snode = None
495         msg = "on %s" % pnode
496       else:
497         msg = "on %s, %s" % (pnode, snode)
498
499       Log(msg, indent=2)
500
501       op = opcodes.OpCreateInstance(instance_name=instance,
502                                     disks = [ {"size": size}
503                                               for size in self.disk_size],
504                                     disk_template=self.opts.disk_template,
505                                     nics=self.opts.nics,
506                                     mode=constants.INSTANCE_CREATE,
507                                     os_type=self.opts.os,
508                                     pnode=pnode,
509                                     snode=snode,
510                                     start=True,
511                                     ip_check=self.opts.ip_check,
512                                     name_check=self.opts.name_check,
513                                     wait_for_sync=True,
514                                     file_driver="loop",
515                                     file_storage_dir=None,
516                                     iallocator=self.opts.iallocator,
517                                     beparams=self.bep,
518                                     hvparams=self.hvp,
519                                     )
520
521       self.ExecOrQueue(instance, op)
522       self.to_rem.append(instance)
523
524   @_DoBatch(False)
525   def BurnGrowDisks(self):
526     """Grow both the os and the swap disks by the requested amount, if any."""
527     Log("Growing disks")
528     for instance in self.instances:
529       Log("instance %s" % instance, indent=1)
530       for idx, growth in enumerate(self.disk_growth):
531         if growth > 0:
532           op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
533                                   amount=growth, wait_for_sync=True)
534           Log("increase disk/%s by %s MB" % (idx, growth), indent=2)
535           self.ExecOrQueue(instance, op)
536
537   @_DoBatch(True)
538   def BurnReplaceDisks1D8(self):
539     """Replace disks on primary and secondary for drbd8."""
540     Log("Replacing disks on the same nodes")
541     for instance in self.instances:
542       Log("instance %s" % instance, indent=1)
543       ops = []
544       for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
545         op = opcodes.OpReplaceDisks(instance_name=instance,
546                                     mode=mode,
547                                     disks=[i for i in range(self.disk_count)])
548         Log("run %s" % mode, indent=2)
549         ops.append(op)
550       self.ExecOrQueue(instance, *ops) # pylint: disable-msg=W0142
551
552   @_DoBatch(True)
553   def BurnReplaceDisks2(self):
554     """Replace secondary node."""
555     Log("Changing the secondary node")
556     mode = constants.REPLACE_DISK_CHG
557
558     mytor = izip(islice(cycle(self.nodes), 2, None),
559                  self.instances)
560     for tnode, instance in mytor:
561       Log("instance %s" % instance, indent=1)
562       if self.opts.iallocator:
563         tnode = None
564         msg = "with iallocator %s" % self.opts.iallocator
565       else:
566         msg = tnode
567       op = opcodes.OpReplaceDisks(instance_name=instance,
568                                   mode=mode,
569                                   remote_node=tnode,
570                                   iallocator=self.opts.iallocator,
571                                   disks=[])
572       Log("run %s %s" % (mode, msg), indent=2)
573       self.ExecOrQueue(instance, op)
574
575   @_DoCheckInstances
576   @_DoBatch(False)
577   def BurnFailover(self):
578     """Failover the instances."""
579     Log("Failing over instances")
580     for instance in self.instances:
581       Log("instance %s" % instance, indent=1)
582       op = opcodes.OpFailoverInstance(instance_name=instance,
583                                       ignore_consistency=False)
584       self.ExecOrQueue(instance, op)
585
586   @_DoCheckInstances
587   @_DoBatch(False)
588   def BurnMove(self):
589     """Move the instances."""
590     Log("Moving instances")
591     mytor = izip(islice(cycle(self.nodes), 1, None),
592                  self.instances)
593     for tnode, instance in mytor:
594       Log("instance %s" % instance, indent=1)
595       op = opcodes.OpMoveInstance(instance_name=instance,
596                                   target_node=tnode)
597       self.ExecOrQueue(instance, op)
598
599   @_DoBatch(False)
600   def BurnMigrate(self):
601     """Migrate the instances."""
602     Log("Migrating instances")
603     for instance in self.instances:
604       Log("instance %s" % instance, indent=1)
605       op1 = opcodes.OpMigrateInstance(instance_name=instance, live=True,
606                                       cleanup=False)
607
608       op2 = opcodes.OpMigrateInstance(instance_name=instance, live=True,
609                                       cleanup=True)
610       Log("migration and migration cleanup", indent=2)
611       self.ExecOrQueue(instance, op1, op2)
612
613   @_DoCheckInstances
614   @_DoBatch(False)
615   def BurnImportExport(self):
616     """Export the instance, delete it, and import it back.
617
618     """
619     Log("Exporting and re-importing instances")
620     mytor = izip(cycle(self.nodes),
621                  islice(cycle(self.nodes), 1, None),
622                  islice(cycle(self.nodes), 2, None),
623                  self.instances)
624
625     for pnode, snode, enode, instance in mytor:
626       Log("instance %s" % instance, indent=1)
627       # read the full name of the instance
628       nam_op = opcodes.OpQueryInstances(output_fields=["name"],
629                                         names=[instance], use_locking=True)
630       full_name = self.ExecOp(False, nam_op)[0][0]
631
632       if self.opts.iallocator:
633         pnode = snode = None
634         import_log_msg = ("import from %s"
635                           " with iallocator %s" %
636                           (enode, self.opts.iallocator))
637       elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
638         snode = None
639         import_log_msg = ("import from %s to %s" %
640                           (enode, pnode))
641       else:
642         import_log_msg = ("import from %s to %s, %s" %
643                           (enode, pnode, snode))
644
645       exp_op = opcodes.OpExportInstance(instance_name=instance,
646                                            target_node=enode,
647                                            shutdown=True)
648       rem_op = opcodes.OpRemoveInstance(instance_name=instance,
649                                         ignore_failures=True)
650       imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
651       imp_op = opcodes.OpCreateInstance(instance_name=instance,
652                                         disks = [ {"size": size}
653                                                   for size in self.disk_size],
654                                         disk_template=self.opts.disk_template,
655                                         nics=self.opts.nics,
656                                         mode=constants.INSTANCE_IMPORT,
657                                         src_node=enode,
658                                         src_path=imp_dir,
659                                         pnode=pnode,
660                                         snode=snode,
661                                         start=True,
662                                         ip_check=self.opts.ip_check,
663                                         name_check=self.opts.name_check,
664                                         wait_for_sync=True,
665                                         file_storage_dir=None,
666                                         file_driver="loop",
667                                         iallocator=self.opts.iallocator,
668                                         beparams=self.bep,
669                                         hvparams=self.hvp,
670                                         )
671
672       erem_op = opcodes.OpRemoveExport(instance_name=instance)
673
674       Log("export to node %s" % enode, indent=2)
675       Log("remove instance", indent=2)
676       Log(import_log_msg, indent=2)
677       Log("remove export", indent=2)
678       self.ExecOrQueue(instance, exp_op, rem_op, imp_op, erem_op)
679
680   @staticmethod
681   def StopInstanceOp(instance):
682     """Stop given instance."""
683     return opcodes.OpShutdownInstance(instance_name=instance)
684
685   @staticmethod
686   def StartInstanceOp(instance):
687     """Start given instance."""
688     return opcodes.OpStartupInstance(instance_name=instance, force=False)
689
690   @staticmethod
691   def RenameInstanceOp(instance, instance_new):
692     """Rename instance."""
693     return opcodes.OpRenameInstance(instance_name=instance,
694                                     new_name=instance_new)
695
696   @_DoCheckInstances
697   @_DoBatch(True)
698   def BurnStopStart(self):
699     """Stop/start the instances."""
700     Log("Stopping and starting instances")
701     for instance in self.instances:
702       Log("instance %s" % instance, indent=1)
703       op1 = self.StopInstanceOp(instance)
704       op2 = self.StartInstanceOp(instance)
705       self.ExecOrQueue(instance, op1, op2)
706
707   @_DoBatch(False)
708   def BurnRemove(self):
709     """Remove the instances."""
710     Log("Removing instances")
711     for instance in self.to_rem:
712       Log("instance %s" % instance, indent=1)
713       op = opcodes.OpRemoveInstance(instance_name=instance,
714                                     ignore_failures=True)
715       self.ExecOrQueue(instance, op)
716
717   def BurnRename(self):
718     """Rename the instances.
719
720     Note that this function will not execute in parallel, since we
721     only have one target for rename.
722
723     """
724     Log("Renaming instances")
725     rename = self.opts.rename
726     for instance in self.instances:
727       Log("instance %s" % instance, indent=1)
728       op_stop1 = self.StopInstanceOp(instance)
729       op_stop2 = self.StopInstanceOp(rename)
730       op_rename1 = self.RenameInstanceOp(instance, rename)
731       op_rename2 = self.RenameInstanceOp(rename, instance)
732       op_start1 = self.StartInstanceOp(rename)
733       op_start2 = self.StartInstanceOp(instance)
734       self.ExecOp(False, op_stop1, op_rename1, op_start1)
735       self._CheckInstanceAlive(rename)
736       self.ExecOp(False, op_stop2, op_rename2, op_start2)
737       self._CheckInstanceAlive(instance)
738
739   @_DoCheckInstances
740   @_DoBatch(True)
741   def BurnReinstall(self):
742     """Reinstall the instances."""
743     Log("Reinstalling instances")
744     for instance in self.instances:
745       Log("instance %s" % instance, indent=1)
746       op1 = self.StopInstanceOp(instance)
747       op2 = opcodes.OpReinstallInstance(instance_name=instance)
748       Log("reinstall without passing the OS", indent=2)
749       op3 = opcodes.OpReinstallInstance(instance_name=instance,
750                                         os_type=self.opts.os)
751       Log("reinstall specifying the OS", indent=2)
752       op4 = self.StartInstanceOp(instance)
753       self.ExecOrQueue(instance, op1, op2, op3, op4)
754
755   @_DoCheckInstances
756   @_DoBatch(True)
757   def BurnReboot(self):
758     """Reboot the instances."""
759     Log("Rebooting instances")
760     for instance in self.instances:
761       Log("instance %s" % instance, indent=1)
762       ops = []
763       for reboot_type in constants.REBOOT_TYPES:
764         op = opcodes.OpRebootInstance(instance_name=instance,
765                                       reboot_type=reboot_type,
766                                       ignore_secondaries=False)
767         Log("reboot with type '%s'" % reboot_type, indent=2)
768         ops.append(op)
769       self.ExecOrQueue(instance, *ops) # pylint: disable-msg=W0142
770
771   @_DoCheckInstances
772   @_DoBatch(True)
773   def BurnActivateDisks(self):
774     """Activate and deactivate disks of the instances."""
775     Log("Activating/deactivating disks")
776     for instance in self.instances:
777       Log("instance %s" % instance, indent=1)
778       op_start = self.StartInstanceOp(instance)
779       op_act = opcodes.OpActivateInstanceDisks(instance_name=instance)
780       op_deact = opcodes.OpDeactivateInstanceDisks(instance_name=instance)
781       op_stop = self.StopInstanceOp(instance)
782       Log("activate disks when online", indent=2)
783       Log("activate disks when offline", indent=2)
784       Log("deactivate disks (when offline)", indent=2)
785       self.ExecOrQueue(instance, op_act, op_stop, op_act, op_deact, op_start)
786
787   @_DoCheckInstances
788   @_DoBatch(False)
789   def BurnAddRemoveDisks(self):
790     """Add and remove an extra disk for the instances."""
791     Log("Adding and removing disks")
792     for instance in self.instances:
793       Log("instance %s" % instance, indent=1)
794       op_add = opcodes.OpSetInstanceParams(\
795         instance_name=instance,
796         disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})])
797       op_rem = opcodes.OpSetInstanceParams(\
798         instance_name=instance, disks=[(constants.DDM_REMOVE, {})])
799       op_stop = self.StopInstanceOp(instance)
800       op_start = self.StartInstanceOp(instance)
801       Log("adding a disk", indent=2)
802       Log("removing last disk", indent=2)
803       self.ExecOrQueue(instance, op_add, op_stop, op_rem, op_start)
804
805   @_DoBatch(False)
806   def BurnAddRemoveNICs(self):
807     """Add and remove an extra NIC for the instances."""
808     Log("Adding and removing NICs")
809     for instance in self.instances:
810       Log("instance %s" % instance, indent=1)
811       op_add = opcodes.OpSetInstanceParams(\
812         instance_name=instance, nics=[(constants.DDM_ADD, {})])
813       op_rem = opcodes.OpSetInstanceParams(\
814         instance_name=instance, nics=[(constants.DDM_REMOVE, {})])
815       Log("adding a NIC", indent=2)
816       Log("removing last NIC", indent=2)
817       self.ExecOrQueue(instance, op_add, op_rem)
818
819   def _CheckInstanceAlive(self, instance):
820     """Check if an instance is alive by doing http checks.
821
822     This will try to retrieve the url on the instance /hostname.txt
823     and check that it contains the hostname of the instance. In case
824     we get ECONNREFUSED, we retry up to the net timeout seconds, for
825     any other error we abort.
826
827     """
828     if not self.opts.http_check:
829       return
830     end_time = time.time() + self.opts.net_timeout
831     url = None
832     while time.time() < end_time and url is None:
833       try:
834         url = self.url_opener.open("http://%s/hostname.txt" % instance)
835       except IOError:
836         # here we can have connection refused, no route to host, etc.
837         time.sleep(1)
838     if url is None:
839       raise InstanceDown(instance, "Cannot contact instance")
840     hostname = url.read().strip()
841     url.close()
842     if hostname != instance:
843       raise InstanceDown(instance, ("Hostname mismatch, expected %s, got %s" %
844                                     (instance, hostname)))
845
846   def BurninCluster(self):
847     """Test a cluster intensively.
848
849     This will create instances and then start/stop/failover them.
850     It is safe for existing instances but could impact performance.
851
852     """
853
854     opts = self.opts
855
856     Log("Testing global parameters")
857
858     if (len(self.nodes) == 1 and
859         opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
860                                    constants.DT_FILE)):
861       Err("When one node is available/selected the disk template must"
862           " be 'diskless', 'file' or 'plain'")
863
864     has_err = True
865     try:
866       self.BurnCreateInstances()
867       if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
868         self.BurnReplaceDisks1D8()
869       if (opts.do_replace2 and len(self.nodes) > 2 and
870           opts.disk_template in constants.DTS_NET_MIRROR) :
871         self.BurnReplaceDisks2()
872
873       if (opts.disk_template != constants.DT_DISKLESS and
874           utils.any(self.disk_growth, lambda n: n > 0)):
875         self.BurnGrowDisks()
876
877       if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
878         self.BurnFailover()
879
880       if opts.do_migrate and opts.disk_template == constants.DT_DRBD8:
881         self.BurnMigrate()
882
883       if (opts.do_move and len(self.nodes) > 1 and
884           opts.disk_template in [constants.DT_PLAIN, constants.DT_FILE]):
885         self.BurnMove()
886
887       if (opts.do_importexport and
888           opts.disk_template not in (constants.DT_DISKLESS,
889                                      constants.DT_FILE)):
890         self.BurnImportExport()
891
892       if opts.do_reinstall:
893         self.BurnReinstall()
894
895       if opts.do_reboot:
896         self.BurnReboot()
897
898       if opts.do_addremove_disks:
899         self.BurnAddRemoveDisks()
900
901       if opts.do_addremove_nics:
902         self.BurnAddRemoveNICs()
903
904       if opts.do_activate_disks:
905         self.BurnActivateDisks()
906
907       if opts.rename:
908         self.BurnRename()
909
910       if opts.do_startstop:
911         self.BurnStopStart()
912
913       has_err = False
914     finally:
915       if has_err:
916         Log("Error detected: opcode buffer follows:\n\n")
917         Log(self.GetFeedbackBuf())
918         Log("\n\n")
919       if not self.opts.keep_instances:
920         try:
921           self.BurnRemove()
922         except Exception, err:  # pylint: disable-msg=W0703
923           if has_err: # already detected errors, so errors in removal
924                       # are quite expected
925             Log("Note: error detected during instance remove: %s" % str(err))
926           else: # non-expected error
927             raise
928
929     return 0
930
931
932 def main():
933   """Main function"""
934
935   burner = Burner()
936   return burner.BurninCluster()
937
938
939 if __name__ == "__main__":
940   main()