Statistics
| Branch: | Tag: | Revision:

root / tools / burnin @ e7beaa02

History | View | Annotate | Download (32.7 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Burnin program
23

    
24
"""
25

    
26
import os
27
import sys
28
import optparse
29
import time
30
import socket
31
import urllib
32
from itertools import izip, islice, cycle
33
from cStringIO import StringIO
34

    
35
from ganeti import opcodes
36
from ganeti import constants
37
from ganeti import cli
38
from ganeti import errors
39
from ganeti import utils
40

    
41

    
42
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
43

    
44
MAX_RETRIES = 3
45

    
46
class InstanceDown(Exception):
47
  """The checked instance was not up"""
48

    
49

    
50
class BurninFailure(Exception):
51
  """Failure detected during burning"""
52

    
53

    
54
def Usage():
55
  """Shows program usage information and exits the program."""
56

    
57
  print >> sys.stderr, "Usage:"
58
  print >> sys.stderr, USAGE
59
  sys.exit(2)
60

    
61

    
62
def Log(msg, indent=0):
63
  """Simple function that prints out its argument.
64

    
65
  """
66
  headers = {
67
    0: "- ",
68
    1: "* ",
69
    2: ""
70
    }
71
  sys.stdout.write("%*s%s%s\n" % (2*indent, "",
72
                                   headers.get(indent, "  "), msg))
73
  sys.stdout.flush()
74

    
75
def Err(msg, exit_code=1):
76
  """Simple error logging that prints to stderr.
77

    
78
  """
79
  sys.stderr.write(msg + "\n")
80
  sys.stderr.flush()
81
  sys.exit(exit_code)
82

    
83

    
84
class SimpleOpener(urllib.FancyURLopener):
85
  """A simple url opener"""
86

    
87
  def prompt_user_passwd(self, host, realm, clear_cache = 0):
88
    """No-interaction version of prompt_user_passwd."""
89
    return None, None
90

    
91
  def http_error_default(self, url, fp, errcode, errmsg, headers):
92
    """Custom error handling"""
93
    # make sure sockets are not left in CLOSE_WAIT, this is similar
94
    # but with a different exception to the BasicURLOpener class
95
    _ = fp.read() # throw away data
96
    fp.close()
97
    raise InstanceDown("HTTP error returned: code %s, msg %s" %
98
                       (errcode, errmsg))
99

    
100

    
101
OPTIONS = [
102
  cli.cli_option("-o", "--os", dest="os", default=None,
103
                 help="OS to use during burnin",
104
                 metavar="<OS>",
105
                 completion_suggest=cli.OPT_COMPL_ONE_OS),
106
  cli.cli_option("--disk-size", dest="disk_size",
107
                 help="Disk size (determines disk count)",
108
                 default="128m", type="string", metavar="<size,size,...>",
109
                 completion_suggest=("128M 512M 1G 4G 1G,256M"
110
                                     " 4G,1G,1G 10G").split()),
111
  cli.cli_option("--disk-growth", dest="disk_growth", help="Disk growth",
112
                 default="128m", type="string", metavar="<size,size,...>"),
113
  cli.cli_option("--mem-size", dest="mem_size", help="Memory size",
114
                 default=128, type="unit", metavar="<size>",
115
                 completion_suggest=("128M 256M 512M 1G 4G 8G"
116
                                     " 12G 16G").split()),
117
  cli.VERBOSE_OPT,
118
  cli.NOIPCHECK_OPT,
119
  cli.NONAMECHECK_OPT,
120
  cli.cli_option("--no-replace1", dest="do_replace1",
121
                 help="Skip disk replacement with the same secondary",
122
                 action="store_false", default=True),
123
  cli.cli_option("--no-replace2", dest="do_replace2",
124
                 help="Skip disk replacement with a different secondary",
125
                 action="store_false", default=True),
126
  cli.cli_option("--no-failover", dest="do_failover",
127
                 help="Skip instance failovers", action="store_false",
128
                 default=True),
129
  cli.cli_option("--no-migrate", dest="do_migrate",
130
                 help="Skip instance live migration",
131
                 action="store_false", default=True),
132
  cli.cli_option("--no-move", dest="do_move",
133
                 help="Skip instance moves", action="store_false",
134
                 default=True),
135
  cli.cli_option("--no-importexport", dest="do_importexport",
136
                 help="Skip instance export/import", action="store_false",
137
                 default=True),
138
  cli.cli_option("--no-startstop", dest="do_startstop",
139
                 help="Skip instance stop/start", action="store_false",
140
                 default=True),
141
  cli.cli_option("--no-reinstall", dest="do_reinstall",
142
                 help="Skip instance reinstall", action="store_false",
143
                 default=True),
144
  cli.cli_option("--no-reboot", dest="do_reboot",
145
                 help="Skip instance reboot", action="store_false",
146
                 default=True),
147
  cli.cli_option("--no-activate-disks", dest="do_activate_disks",
148
                 help="Skip disk activation/deactivation",
149
                 action="store_false", default=True),
150
  cli.cli_option("--no-add-disks", dest="do_addremove_disks",
151
                 help="Skip disk addition/removal",
152
                 action="store_false", default=True),
153
  cli.cli_option("--no-add-nics", dest="do_addremove_nics",
154
                 help="Skip NIC addition/removal",
155
                 action="store_false", default=True),
156
  cli.cli_option("--no-nics", dest="nics",
157
                 help="No network interfaces", action="store_const",
158
                 const=[], default=[{}]),
159
  cli.cli_option("--rename", dest="rename", default=None,
160
                 help=("Give one unused instance name which is taken"
161
                       " to start the renaming sequence"),
162
                 metavar="<instance_name>"),
163
  cli.cli_option("-t", "--disk-template", dest="disk_template",
164
                 choices=list(constants.DISK_TEMPLATES),
165
                 default=constants.DT_DRBD8,
166
                 help="Disk template (diskless, file, plain or drbd) [drbd]"),
167
  cli.cli_option("-n", "--nodes", dest="nodes", default="",
168
                 help=("Comma separated list of nodes to perform"
169
                       " the burnin on (defaults to all nodes)"),
170
                 completion_suggest=cli.OPT_COMPL_MANY_NODES),
171
  cli.cli_option("-I", "--iallocator", dest="iallocator",
172
                 default=None, type="string",
173
                 help=("Perform the allocation using an iallocator"
174
                       " instead of fixed node spread (node restrictions no"
175
                       " longer apply, therefore -n/--nodes must not be"
176
                       " used"),
177
                 completion_suggest=cli.OPT_COMPL_ONE_IALLOCATOR),
178
  cli.cli_option("-p", "--parallel", default=False, action="store_true",
179
                 dest="parallel",
180
                 help=("Enable parallelization of some operations in"
181
                       " order to speed burnin or to test granular locking")),
182
  cli.cli_option("--net-timeout", default=15, type="int",
183
                 dest="net_timeout",
184
                 help=("The instance check network timeout in seconds"
185
                       " (defaults to 15 seconds)"),
186
                 completion_suggest="15 60 300 900".split()),
187
  cli.cli_option("-C", "--http-check", default=False, action="store_true",
188
                 dest="http_check",
189
                 help=("Enable checking of instance status via http,"
190
                       " looking for /hostname.txt that should contain the"
191
                       " name of the instance")),
192
  cli.cli_option("-K", "--keep-instances", default=False,
193
                 action="store_true",
194
                 dest="keep_instances",
195
                 help=("Leave instances on the cluster after burnin,"
196
                       " for investigation in case of errors or simply"
197
                       " to use them")),
198
  ]
199

    
200
# Mainly used for bash completion
201
ARGUMENTS = [cli.ArgInstance(min=1)]
202

    
203

    
204
def _DoCheckInstances(fn):
205
  """Decorator for checking instances.
206

    
207
  """
208
  def wrapper(self, *args, **kwargs):
209
    val = fn(self, *args, **kwargs)
210
    for instance in self.instances:
211
      self._CheckInstanceAlive(instance)
212
    return val
213

    
214
  return wrapper
215

    
216

    
217
def _DoBatch(retry):
218
  """Decorator for possible batch operations.
219

    
220
  Must come after the _DoCheckInstances decorator (if any).
221

    
222
  @param retry: whether this is a retryable batch, will be
223
      passed to StartBatch
224

    
225
  """
226
  def wrap(fn):
227
    def batched(self, *args, **kwargs):
228
      self.StartBatch(retry)
229
      val = fn(self, *args, **kwargs)
230
      self.CommitQueue()
231
      return val
232
    return batched
233

    
234
  return wrap
235

    
236

    
237
class Burner(object):
238
  """Burner class."""
239

    
240
  def __init__(self):
241
    """Constructor."""
242
    utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
243
    self.url_opener = SimpleOpener()
244
    self._feed_buf = StringIO()
245
    self.nodes = []
246
    self.instances = []
247
    self.to_rem = []
248
    self.queued_ops = []
249
    self.opts = None
250
    self.queue_retry = False
251
    self.disk_count = self.disk_growth = self.disk_size = None
252
    self.hvp = self.bep = None
253
    self.ParseOptions()
254
    self.cl = cli.GetClient()
255
    self.GetState()
256

    
257
  def ClearFeedbackBuf(self):
258
    """Clear the feedback buffer."""
259
    self._feed_buf.truncate(0)
260

    
261
  def GetFeedbackBuf(self):
262
    """Return the contents of the buffer."""
263
    return self._feed_buf.getvalue()
264

    
265
  def Feedback(self, msg):
266
    """Acumulate feedback in our buffer."""
267
    formatted_msg = "%s %s" % (time.ctime(utils.MergeTime(msg[0])), msg[2])
268
    self._feed_buf.write(formatted_msg + "\n")
269
    if self.opts.verbose:
270
      Log(formatted_msg, indent=3)
271

    
272
  def MaybeRetry(self, retry_count, msg, fn, *args):
273
    """Possibly retry a given function execution.
274

    
275
    @type retry_count: int
276
    @param retry_count: retry counter:
277
        - 0: non-retryable action
278
        - 1: last retry for a retryable action
279
        - MAX_RETRIES: original try for a retryable action
280
    @type msg: str
281
    @param msg: the kind of the operation
282
    @type fn: callable
283
    @param fn: the function to be called
284

    
285
    """
286
    try:
287
      val = fn(*args)
288
      if retry_count > 0 and retry_count < MAX_RETRIES:
289
        Log("Idempotent %s succeeded after %d retries" %
290
            (msg, MAX_RETRIES - retry_count))
291
      return val
292
    except Exception, err:
293
      if retry_count == 0:
294
        Log("Non-idempotent %s failed, aborting" % (msg, ))
295
        raise
296
      elif retry_count == 1:
297
        Log("Idempotent %s repeated failure, aborting" % (msg, ))
298
        raise
299
      else:
300
        Log("Idempotent %s failed, retry #%d/%d: %s" %
301
            (msg, MAX_RETRIES - retry_count + 1, MAX_RETRIES, err))
302
        self.MaybeRetry(retry_count - 1, msg, fn, *args)
303

    
304
  def _ExecOp(self, *ops):
305
    """Execute one or more opcodes and manage the exec buffer.
306

    
307
    @result: if only opcode has been passed, we return its result;
308
        otherwise we return the list of results
309

    
310
    """
311
    job_id = cli.SendJob(ops, cl=self.cl)
312
    results = cli.PollJob(job_id, cl=self.cl, feedback_fn=self.Feedback)
313
    if len(ops) == 1:
314
      return results[0]
315
    else:
316
      return results
317

    
318
  def ExecOp(self, retry, *ops):
319
    """Execute one or more opcodes and manage the exec buffer.
320

    
321
    @result: if only opcode has been passed, we return its result;
322
        otherwise we return the list of results
323

    
324
    """
325
    if retry:
326
      rval = MAX_RETRIES
327
    else:
328
      rval = 0
329
    return self.MaybeRetry(rval, "opcode", self._ExecOp, *ops)
330

    
331
  def ExecOrQueue(self, name, *ops):
332
    """Execute an opcode and manage the exec buffer."""
333
    if self.opts.parallel:
334
      self.queued_ops.append((ops, name))
335
    else:
336
      return self.ExecOp(self.queue_retry, *ops)
337

    
338
  def StartBatch(self, retry):
339
    """Start a new batch of jobs.
340

    
341
    @param retry: whether this is a retryable batch
342

    
343
    """
344
    self.queued_ops = []
345
    self.queue_retry = retry
346

    
347
  def CommitQueue(self):
348
    """Execute all submitted opcodes in case of parallel burnin"""
349
    if not self.opts.parallel:
350
      return
351

    
352
    if self.queue_retry:
353
      rval = MAX_RETRIES
354
    else:
355
      rval = 0
356

    
357
    try:
358
      results = self.MaybeRetry(rval, "jobset", self.ExecJobSet,
359
                                self.queued_ops)
360
    finally:
361
      self.queued_ops = []
362
    return results
363

    
364
  def ExecJobSet(self, jobs):
365
    """Execute a set of jobs and return once all are done.
366

    
367
    The method will return the list of results, if all jobs are
368
    successful. Otherwise, OpExecError will be raised from within
369
    cli.py.
370

    
371
    """
372
    self.ClearFeedbackBuf()
373
    job_ids = [cli.SendJob(row[0], cl=self.cl) for row in jobs]
374
    Log("Submitted job ID(s) %s" % utils.CommaJoin(job_ids), indent=1)
375
    results = []
376
    for jid, (_, iname) in zip(job_ids, jobs):
377
      Log("waiting for job %s for %s" % (jid, iname), indent=2)
378
      try:
379
        results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
380
      except Exception, err:
381
        Log("Job for %s failed: %s" % (iname, err))
382
    if len(results) != len(jobs):
383
      raise BurninFailure()
384
    return results
385

    
386
  def ParseOptions(self):
387
    """Parses the command line options.
388

    
389
    In case of command line errors, it will show the usage and exit the
390
    program.
391

    
392
    """
393
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
394
                                   version=("%%prog (ganeti) %s" %
395
                                            constants.RELEASE_VERSION),
396
                                   option_list=OPTIONS)
397

    
398
    options, args = parser.parse_args()
399
    if len(args) < 1 or options.os is None:
400
      Usage()
401

    
402
    supported_disk_templates = (constants.DT_DISKLESS,
403
                                constants.DT_FILE,
404
                                constants.DT_PLAIN,
405
                                constants.DT_DRBD8)
406
    if options.disk_template not in supported_disk_templates:
407
      Err("Unknown disk template '%s'" % options.disk_template)
408

    
409
    if options.disk_template == constants.DT_DISKLESS:
410
      disk_size = disk_growth = []
411
      options.do_addremove_disks = False
412
    else:
413
      disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
414
      disk_growth = [utils.ParseUnit(v)
415
                     for v in options.disk_growth.split(",")]
416
      if len(disk_growth) != len(disk_size):
417
        Err("Wrong disk sizes/growth combination")
418
    if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
419
        (not disk_size and options.disk_template != constants.DT_DISKLESS)):
420
      Err("Wrong disk count/disk template combination")
421

    
422
    self.disk_size = disk_size
423
    self.disk_growth = disk_growth
424
    self.disk_count = len(disk_size)
425

    
426
    if options.nodes and options.iallocator:
427
      Err("Give either the nodes option or the iallocator option, not both")
428

    
429
    if options.http_check and not options.name_check:
430
      Err("Can't enable HTTP checks without name checks")
431

    
432
    self.opts = options
433
    self.instances = args
434
    self.bep = {
435
      constants.BE_MEMORY: options.mem_size,
436
      constants.BE_VCPUS: 1,
437
      }
438
    self.hvp = {}
439

    
440
    socket.setdefaulttimeout(options.net_timeout)
441

    
442
  def GetState(self):
443
    """Read the cluster state from the config."""
444
    if self.opts.nodes:
445
      names = self.opts.nodes.split(",")
446
    else:
447
      names = []
448
    try:
449
      op = opcodes.OpQueryNodes(output_fields=["name", "offline", "drained"],
450
                                names=names, use_locking=True)
451
      result = self.ExecOp(True, op)
452
    except errors.GenericError, err:
453
      err_code, msg = cli.FormatError(err)
454
      Err(msg, exit_code=err_code)
455
    self.nodes = [data[0] for data in result if not (data[1] or data[2])]
456

    
457
    op_diagnose = opcodes.OpDiagnoseOS(output_fields=["name", "valid",
458
                                                      "variants"], names=[])
459
    result = self.ExecOp(True, op_diagnose)
460

    
461
    if not result:
462
      Err("Can't get the OS list")
463

    
464
    found = False
465
    for (name, valid, variants) in result:
466
      if valid and self.opts.os in cli.CalculateOSNames(name, variants):
467
        found = True
468
        break
469

    
470
    if not found:
471
      Err("OS '%s' not found" % self.opts.os)
472

    
473
  @_DoCheckInstances
474
  @_DoBatch(False)
475
  def BurnCreateInstances(self):
476
    """Create the given instances.
477

    
478
    """
479
    self.to_rem = []
480
    mytor = izip(cycle(self.nodes),
481
                 islice(cycle(self.nodes), 1, None),
482
                 self.instances)
483

    
484
    Log("Creating instances")
485
    for pnode, snode, instance in mytor:
486
      Log("instance %s" % instance, indent=1)
487
      if self.opts.iallocator:
488
        pnode = snode = None
489
        msg = "with iallocator %s" % self.opts.iallocator
490
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
491
        snode = None
492
        msg = "on %s" % pnode
493
      else:
494
        msg = "on %s, %s" % (pnode, snode)
495

    
496
      Log(msg, indent=2)
497

    
498
      op = opcodes.OpCreateInstance(instance_name=instance,
499
                                    disks = [ {"size": size}
500
                                              for size in self.disk_size],
501
                                    disk_template=self.opts.disk_template,
502
                                    nics=self.opts.nics,
503
                                    mode=constants.INSTANCE_CREATE,
504
                                    os_type=self.opts.os,
505
                                    pnode=pnode,
506
                                    snode=snode,
507
                                    start=True,
508
                                    ip_check=self.opts.ip_check,
509
                                    name_check=self.opts.name_check,
510
                                    wait_for_sync=True,
511
                                    file_driver="loop",
512
                                    file_storage_dir=None,
513
                                    iallocator=self.opts.iallocator,
514
                                    beparams=self.bep,
515
                                    hvparams=self.hvp,
516
                                    )
517

    
518
      self.ExecOrQueue(instance, op)
519
      self.to_rem.append(instance)
520

    
521
  @_DoBatch(False)
522
  def BurnGrowDisks(self):
523
    """Grow both the os and the swap disks by the requested amount, if any."""
524
    Log("Growing disks")
525
    for instance in self.instances:
526
      Log("instance %s" % instance, indent=1)
527
      for idx, growth in enumerate(self.disk_growth):
528
        if growth > 0:
529
          op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
530
                                  amount=growth, wait_for_sync=True)
531
          Log("increase disk/%s by %s MB" % (idx, growth), indent=2)
532
          self.ExecOrQueue(instance, op)
533

    
534
  @_DoBatch(True)
535
  def BurnReplaceDisks1D8(self):
536
    """Replace disks on primary and secondary for drbd8."""
537
    Log("Replacing disks on the same nodes")
538
    for instance in self.instances:
539
      Log("instance %s" % instance, indent=1)
540
      ops = []
541
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
542
        op = opcodes.OpReplaceDisks(instance_name=instance,
543
                                    mode=mode,
544
                                    disks=[i for i in range(self.disk_count)])
545
        Log("run %s" % mode, indent=2)
546
        ops.append(op)
547
      self.ExecOrQueue(instance, *ops)
548

    
549
  @_DoBatch(True)
550
  def BurnReplaceDisks2(self):
551
    """Replace secondary node."""
552
    Log("Changing the secondary node")
553
    mode = constants.REPLACE_DISK_CHG
554

    
555
    mytor = izip(islice(cycle(self.nodes), 2, None),
556
                 self.instances)
557
    for tnode, instance in mytor:
558
      Log("instance %s" % instance, indent=1)
559
      if self.opts.iallocator:
560
        tnode = None
561
        msg = "with iallocator %s" % self.opts.iallocator
562
      else:
563
        msg = tnode
564
      op = opcodes.OpReplaceDisks(instance_name=instance,
565
                                  mode=mode,
566
                                  remote_node=tnode,
567
                                  iallocator=self.opts.iallocator,
568
                                  disks=[])
569
      Log("run %s %s" % (mode, msg), indent=2)
570
      self.ExecOrQueue(instance, op)
571

    
572
  @_DoCheckInstances
573
  @_DoBatch(False)
574
  def BurnFailover(self):
575
    """Failover the instances."""
576
    Log("Failing over instances")
577
    for instance in self.instances:
578
      Log("instance %s" % instance, indent=1)
579
      op = opcodes.OpFailoverInstance(instance_name=instance,
580
                                      ignore_consistency=False)
581
      self.ExecOrQueue(instance, op)
582

    
583
  @_DoCheckInstances
584
  @_DoBatch(False)
585
  def BurnMove(self):
586
    """Move the instances."""
587
    Log("Moving instances")
588
    mytor = izip(islice(cycle(self.nodes), 1, None),
589
                 self.instances)
590
    for tnode, instance in mytor:
591
      Log("instance %s" % instance, indent=1)
592
      op = opcodes.OpMoveInstance(instance_name=instance,
593
                                  target_node=tnode)
594
      self.ExecOrQueue(instance, op)
595

    
596
  @_DoBatch(False)
597
  def BurnMigrate(self):
598
    """Migrate the instances."""
599
    Log("Migrating instances")
600
    for instance in self.instances:
601
      Log("instance %s" % instance, indent=1)
602
      op1 = opcodes.OpMigrateInstance(instance_name=instance, live=True,
603
                                      cleanup=False)
604

    
605
      op2 = opcodes.OpMigrateInstance(instance_name=instance, live=True,
606
                                      cleanup=True)
607
      Log("migration and migration cleanup", indent=2)
608
      self.ExecOrQueue(instance, op1, op2)
609

    
610
  @_DoCheckInstances
611
  @_DoBatch(False)
612
  def BurnImportExport(self):
613
    """Export the instance, delete it, and import it back.
614

    
615
    """
616
    Log("Exporting and re-importing instances")
617
    mytor = izip(cycle(self.nodes),
618
                 islice(cycle(self.nodes), 1, None),
619
                 islice(cycle(self.nodes), 2, None),
620
                 self.instances)
621

    
622
    for pnode, snode, enode, instance in mytor:
623
      Log("instance %s" % instance, indent=1)
624
      # read the full name of the instance
625
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
626
                                        names=[instance], use_locking=True)
627
      full_name = self.ExecOp(False, nam_op)[0][0]
628

    
629
      if self.opts.iallocator:
630
        pnode = snode = None
631
        import_log_msg = ("import from %s"
632
                          " with iallocator %s" %
633
                          (enode, self.opts.iallocator))
634
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
635
        snode = None
636
        import_log_msg = ("import from %s to %s" %
637
                          (enode, pnode))
638
      else:
639
        import_log_msg = ("import from %s to %s, %s" %
640
                          (enode, pnode, snode))
641

    
642
      exp_op = opcodes.OpExportInstance(instance_name=instance,
643
                                           target_node=enode,
644
                                           shutdown=True)
645
      rem_op = opcodes.OpRemoveInstance(instance_name=instance,
646
                                        ignore_failures=True)
647
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
648
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
649
                                        disks = [ {"size": size}
650
                                                  for size in self.disk_size],
651
                                        disk_template=self.opts.disk_template,
652
                                        nics=self.opts.nics,
653
                                        mode=constants.INSTANCE_IMPORT,
654
                                        src_node=enode,
655
                                        src_path=imp_dir,
656
                                        pnode=pnode,
657
                                        snode=snode,
658
                                        start=True,
659
                                        ip_check=self.opts.ip_check,
660
                                        name_check=self.opts.name_check,
661
                                        wait_for_sync=True,
662
                                        file_storage_dir=None,
663
                                        file_driver="loop",
664
                                        iallocator=self.opts.iallocator,
665
                                        beparams=self.bep,
666
                                        hvparams=self.hvp,
667
                                        )
668

    
669
      erem_op = opcodes.OpRemoveExport(instance_name=instance)
670

    
671
      Log("export to node %s" % enode, indent=2)
672
      Log("remove instance", indent=2)
673
      Log(import_log_msg, indent=2)
674
      Log("remove export", indent=2)
675
      self.ExecOrQueue(instance, exp_op, rem_op, imp_op, erem_op)
676

    
677
  def StopInstanceOp(self, instance):
678
    """Stop given instance."""
679
    return opcodes.OpShutdownInstance(instance_name=instance)
680

    
681
  def StartInstanceOp(self, instance):
682
    """Start given instance."""
683
    return opcodes.OpStartupInstance(instance_name=instance, force=False)
684

    
685
  def RenameInstanceOp(self, instance, instance_new):
686
    """Rename instance."""
687
    return opcodes.OpRenameInstance(instance_name=instance,
688
                                    new_name=instance_new)
689

    
690
  @_DoCheckInstances
691
  @_DoBatch(True)
692
  def BurnStopStart(self):
693
    """Stop/start the instances."""
694
    Log("Stopping and starting instances")
695
    for instance in self.instances:
696
      Log("instance %s" % instance, indent=1)
697
      op1 = self.StopInstanceOp(instance)
698
      op2 = self.StartInstanceOp(instance)
699
      self.ExecOrQueue(instance, op1, op2)
700

    
701
  @_DoBatch(False)
702
  def BurnRemove(self):
703
    """Remove the instances."""
704
    Log("Removing instances")
705
    for instance in self.to_rem:
706
      Log("instance %s" % instance, indent=1)
707
      op = opcodes.OpRemoveInstance(instance_name=instance,
708
                                    ignore_failures=True)
709
      self.ExecOrQueue(instance, op)
710

    
711
  def BurnRename(self):
712
    """Rename the instances.
713

    
714
    Note that this function will not execute in parallel, since we
715
    only have one target for rename.
716

    
717
    """
718
    Log("Renaming instances")
719
    rename = self.opts.rename
720
    for instance in self.instances:
721
      Log("instance %s" % instance, indent=1)
722
      op_stop1 = self.StopInstanceOp(instance)
723
      op_stop2 = self.StopInstanceOp(rename)
724
      op_rename1 = self.RenameInstanceOp(instance, rename)
725
      op_rename2 = self.RenameInstanceOp(rename, instance)
726
      op_start1 = self.StartInstanceOp(rename)
727
      op_start2 = self.StartInstanceOp(instance)
728
      self.ExecOp(False, op_stop1, op_rename1, op_start1)
729
      self._CheckInstanceAlive(rename)
730
      self.ExecOp(False, op_stop2, op_rename2, op_start2)
731
      self._CheckInstanceAlive(instance)
732

    
733
  @_DoCheckInstances
734
  @_DoBatch(True)
735
  def BurnReinstall(self):
736
    """Reinstall the instances."""
737
    Log("Reinstalling instances")
738
    for instance in self.instances:
739
      Log("instance %s" % instance, indent=1)
740
      op1 = self.StopInstanceOp(instance)
741
      op2 = opcodes.OpReinstallInstance(instance_name=instance)
742
      Log("reinstall without passing the OS", indent=2)
743
      op3 = opcodes.OpReinstallInstance(instance_name=instance,
744
                                        os_type=self.opts.os)
745
      Log("reinstall specifying the OS", indent=2)
746
      op4 = self.StartInstanceOp(instance)
747
      self.ExecOrQueue(instance, op1, op2, op3, op4)
748

    
749
  @_DoCheckInstances
750
  @_DoBatch(True)
751
  def BurnReboot(self):
752
    """Reboot the instances."""
753
    Log("Rebooting instances")
754
    for instance in self.instances:
755
      Log("instance %s" % instance, indent=1)
756
      ops = []
757
      for reboot_type in constants.REBOOT_TYPES:
758
        op = opcodes.OpRebootInstance(instance_name=instance,
759
                                      reboot_type=reboot_type,
760
                                      ignore_secondaries=False)
761
        Log("reboot with type '%s'" % reboot_type, indent=2)
762
        ops.append(op)
763
      self.ExecOrQueue(instance, *ops)
764

    
765
  @_DoCheckInstances
766
  @_DoBatch(True)
767
  def BurnActivateDisks(self):
768
    """Activate and deactivate disks of the instances."""
769
    Log("Activating/deactivating disks")
770
    for instance in self.instances:
771
      Log("instance %s" % instance, indent=1)
772
      op_start = self.StartInstanceOp(instance)
773
      op_act = opcodes.OpActivateInstanceDisks(instance_name=instance)
774
      op_deact = opcodes.OpDeactivateInstanceDisks(instance_name=instance)
775
      op_stop = self.StopInstanceOp(instance)
776
      Log("activate disks when online", indent=2)
777
      Log("activate disks when offline", indent=2)
778
      Log("deactivate disks (when offline)", indent=2)
779
      self.ExecOrQueue(instance, op_act, op_stop, op_act, op_deact, op_start)
780

    
781
  @_DoCheckInstances
782
  @_DoBatch(False)
783
  def BurnAddRemoveDisks(self):
784
    """Add and remove an extra disk for the instances."""
785
    Log("Adding and removing disks")
786
    for instance in self.instances:
787
      Log("instance %s" % instance, indent=1)
788
      op_add = opcodes.OpSetInstanceParams(\
789
        instance_name=instance,
790
        disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})])
791
      op_rem = opcodes.OpSetInstanceParams(\
792
        instance_name=instance, disks=[(constants.DDM_REMOVE, {})])
793
      op_stop = self.StopInstanceOp(instance)
794
      op_start = self.StartInstanceOp(instance)
795
      Log("adding a disk", indent=2)
796
      Log("removing last disk", indent=2)
797
      self.ExecOrQueue(instance, op_add, op_stop, op_rem, op_start)
798

    
799
  @_DoBatch(False)
800
  def BurnAddRemoveNICs(self):
801
    """Add and remove an extra NIC for the instances."""
802
    Log("Adding and removing NICs")
803
    for instance in self.instances:
804
      Log("instance %s" % instance, indent=1)
805
      op_add = opcodes.OpSetInstanceParams(\
806
        instance_name=instance, nics=[(constants.DDM_ADD, {})])
807
      op_rem = opcodes.OpSetInstanceParams(\
808
        instance_name=instance, nics=[(constants.DDM_REMOVE, {})])
809
      Log("adding a NIC", indent=2)
810
      Log("removing last NIC", indent=2)
811
      self.ExecOrQueue(instance, op_add, op_rem)
812

    
813
  def _CheckInstanceAlive(self, instance):
814
    """Check if an instance is alive by doing http checks.
815

    
816
    This will try to retrieve the url on the instance /hostname.txt
817
    and check that it contains the hostname of the instance. In case
818
    we get ECONNREFUSED, we retry up to the net timeout seconds, for
819
    any other error we abort.
820

    
821
    """
822
    if not self.opts.http_check:
823
      return
824
    end_time = time.time() + self.opts.net_timeout
825
    url = None
826
    while time.time() < end_time and url is None:
827
      try:
828
        url = self.url_opener.open("http://%s/hostname.txt" % instance)
829
      except IOError:
830
        # here we can have connection refused, no route to host, etc.
831
        time.sleep(1)
832
    if url is None:
833
      raise InstanceDown(instance, "Cannot contact instance")
834
    hostname = url.read().strip()
835
    url.close()
836
    if hostname != instance:
837
      raise InstanceDown(instance, ("Hostname mismatch, expected %s, got %s" %
838
                                    (instance, hostname)))
839

    
840
  def BurninCluster(self):
841
    """Test a cluster intensively.
842

    
843
    This will create instances and then start/stop/failover them.
844
    It is safe for existing instances but could impact performance.
845

    
846
    """
847

    
848
    opts = self.opts
849

    
850
    Log("Testing global parameters")
851

    
852
    if (len(self.nodes) == 1 and
853
        opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
854
                                   constants.DT_FILE)):
855
      Err("When one node is available/selected the disk template must"
856
          " be 'diskless', 'file' or 'plain'")
857

    
858
    has_err = True
859
    try:
860
      self.BurnCreateInstances()
861
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
862
        self.BurnReplaceDisks1D8()
863
      if (opts.do_replace2 and len(self.nodes) > 2 and
864
          opts.disk_template in constants.DTS_NET_MIRROR) :
865
        self.BurnReplaceDisks2()
866

    
867
      if (opts.disk_template != constants.DT_DISKLESS and
868
          utils.any(self.disk_growth, lambda n: n > 0)):
869
        self.BurnGrowDisks()
870

    
871
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
872
        self.BurnFailover()
873

    
874
      if opts.do_migrate and opts.disk_template == constants.DT_DRBD8:
875
        self.BurnMigrate()
876

    
877
      if (opts.do_move and len(self.nodes) > 1 and
878
          opts.disk_template in [constants.DT_PLAIN, constants.DT_FILE]):
879
        self.BurnMove()
880

    
881
      if (opts.do_importexport and
882
          opts.disk_template not in (constants.DT_DISKLESS,
883
                                     constants.DT_FILE)):
884
        self.BurnImportExport()
885

    
886
      if opts.do_reinstall:
887
        self.BurnReinstall()
888

    
889
      if opts.do_reboot:
890
        self.BurnReboot()
891

    
892
      if opts.do_addremove_disks:
893
        self.BurnAddRemoveDisks()
894

    
895
      if opts.do_addremove_nics:
896
        self.BurnAddRemoveNICs()
897

    
898
      if opts.do_activate_disks:
899
        self.BurnActivateDisks()
900

    
901
      if opts.rename:
902
        self.BurnRename()
903

    
904
      if opts.do_startstop:
905
        self.BurnStopStart()
906

    
907
      has_err = False
908
    finally:
909
      if has_err:
910
        Log("Error detected: opcode buffer follows:\n\n")
911
        Log(self.GetFeedbackBuf())
912
        Log("\n\n")
913
      if not self.opts.keep_instances:
914
        try:
915
          self.BurnRemove()
916
        except Exception, err:
917
          if has_err: # already detected errors, so errors in removal
918
                      # are quite expected
919
            Log("Note: error detected during instance remove: %s" % str(err))
920
          else: # non-expected error
921
            raise
922

    
923
    return 0
924

    
925

    
926
def main():
927
  """Main function"""
928

    
929
  burner = Burner()
930
  return burner.BurninCluster()
931

    
932

    
933
if __name__ == "__main__":
934
  main()