Statistics
| Branch: | Tag: | Revision:

root / tools / burnin @ 90e722d1

History | View | Annotate | Download (20.9 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Burnin program
23

    
24
"""
25

    
26
import os
27
import sys
28
import optparse
29
import time
30
from itertools import izip, islice, cycle
31
from cStringIO import StringIO
32

    
33
from ganeti import opcodes
34
from ganeti import mcpu
35
from ganeti import constants
36
from ganeti import cli
37
from ganeti import errors
38
from ganeti import utils
39

    
40

    
41
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
42

    
43

    
44
def Usage():
45
  """Shows program usage information and exits the program."""
46

    
47
  print >> sys.stderr, "Usage:"
48
  print >> sys.stderr, USAGE
49
  sys.exit(2)
50

    
51

    
52
def Log(msg):
53
  """Simple function that prints out its argument.
54

    
55
  """
56
  print msg
57
  sys.stdout.flush()
58

    
59

    
60
class Burner(object):
61
  """Burner class."""
62

    
63
  def __init__(self):
64
    """Constructor."""
65
    utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
66
    self._feed_buf = StringIO()
67
    self.nodes = []
68
    self.instances = []
69
    self.to_rem = []
70
    self.opts = None
71
    self.cl = cli.GetClient()
72
    self.ParseOptions()
73
    self.GetState()
74

    
75
  def ClearFeedbackBuf(self):
76
    """Clear the feedback buffer."""
77
    self._feed_buf.truncate(0)
78

    
79
  def GetFeedbackBuf(self):
80
    """Return the contents of the buffer."""
81
    return self._feed_buf.getvalue()
82

    
83
  def Feedback(self, msg):
84
    """Acumulate feedback in our buffer."""
85
    self._feed_buf.write("%s %s\n" % (time.ctime(utils.MergeTime(msg[0])),
86
                                      msg[2]))
87
    if self.opts.verbose:
88
      Log(msg)
89

    
90
  def ExecOp(self, op):
91
    """Execute an opcode and manage the exec buffer."""
92
    self.ClearFeedbackBuf()
93
    return cli.SubmitOpCode(op, feedback_fn=self.Feedback, cl=self.cl)
94

    
95
  def ExecJobSet(self, jobs):
96
    """Execute a set of jobs and return once all are done.
97

    
98
    The method will return the list of results, if all jobs are
99
    successfull. Otherwise, OpExecError will be raised from within
100
    cli.py.
101

    
102
    """
103
    self.ClearFeedbackBuf()
104
    job_ids = [cli.SendJob(job, cl=self.cl) for job in jobs]
105
    Log("- Submitted job IDs %s" % ", ".join(job_ids))
106
    results = []
107
    for jid in job_ids:
108
      Log("- Waiting for job %s" % jid)
109
      results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
110

    
111
    return results
112

    
113
  def ParseOptions(self):
114
    """Parses the command line options.
115

    
116
    In case of command line errors, it will show the usage and exit the
117
    program.
118

    
119
    """
120

    
121
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
122
                                   version="%%prog (ganeti) %s" %
123
                                   constants.RELEASE_VERSION,
124
                                   option_class=cli.CliOption)
125

    
126
    parser.add_option("-o", "--os", dest="os", default=None,
127
                      help="OS to use during burnin",
128
                      metavar="<OS>")
129
    parser.add_option("--disk-size", dest="disk_size",
130
                      help="Disk size (determines disk count)",
131
                      default="128m", type="string", metavar="<size,size,...>")
132
    parser.add_option("--disk-growth", dest="disk_growth", help="Disk growth",
133
                      default="128m", type="string", metavar="<size,size,...>")
134
    parser.add_option("--mem-size", dest="mem_size", help="Memory size",
135
                      default=128, type="unit", metavar="<size>")
136
    parser.add_option("-v", "--verbose",
137
                      action="store_true", dest="verbose", default=False,
138
                      help="print command execution messages to stdout")
139
    parser.add_option("--no-replace1", dest="do_replace1",
140
                      help="Skip disk replacement with the same secondary",
141
                      action="store_false", default=True)
142
    parser.add_option("--no-replace2", dest="do_replace2",
143
                      help="Skip disk replacement with a different secondary",
144
                      action="store_false", default=True)
145
    parser.add_option("--no-failover", dest="do_failover",
146
                      help="Skip instance failovers", action="store_false",
147
                      default=True)
148
    parser.add_option("--no-importexport", dest="do_importexport",
149
                      help="Skip instance export/import", action="store_false",
150
                      default=True)
151
    parser.add_option("--no-startstop", dest="do_startstop",
152
                      help="Skip instance stop/start", action="store_false",
153
                      default=True)
154
    parser.add_option("--no-reinstall", dest="do_reinstall",
155
                      help="Skip instance reinstall", action="store_false",
156
                      default=True)
157
    parser.add_option("--no-reboot", dest="do_reboot",
158
                      help="Skip instance reboot", action="store_false",
159
                      default=True)
160
    parser.add_option("--no-activate-disks", dest="do_activate_disks",
161
                      help="Skip disk activation/deactivation",
162
                      action="store_false", default=True)
163
    parser.add_option("--no-nics", dest="nics",
164
                      help="No network interfaces", action="store_const",
165
                      const=[], default=[{}])
166
    parser.add_option("--rename", dest="rename", default=None,
167
                      help="Give one unused instance name which is taken"
168
                           " to start the renaming sequence",
169
                      metavar="<instance_name>")
170
    parser.add_option("-t", "--disk-template", dest="disk_template",
171
                      choices=("diskless", "file", "plain", "drbd"),
172
                      default="drbd",
173
                      help="Disk template (diskless, file, plain or drbd)"
174
                            " [drbd]")
175
    parser.add_option("-n", "--nodes", dest="nodes", default="",
176
                      help="Comma separated list of nodes to perform"
177
                      " the burnin on (defaults to all nodes)")
178
    parser.add_option("--iallocator", dest="iallocator",
179
                      default=None, type="string",
180
                      help="Perform the allocation using an iallocator"
181
                      " instead of fixed node spread (node restrictions no"
182
                      " longer apply, therefore -n/--nodes must not be used")
183
    parser.add_option("-p", "--parallel", default=False, action="store_true",
184
                      dest="parallel",
185
                      help="Enable parallelization of some operations in"
186
                      " order to speed burnin or to test granular locking")
187

    
188
    options, args = parser.parse_args()
189
    if len(args) < 1 or options.os is None:
190
      Usage()
191

    
192
    supported_disk_templates = (constants.DT_DISKLESS,
193
                                constants.DT_FILE,
194
                                constants.DT_PLAIN,
195
                                constants.DT_DRBD8)
196
    if options.disk_template not in supported_disk_templates:
197
      Log("Unknown disk template '%s'" % options.disk_template)
198
      sys.exit(1)
199

    
200
    if options.disk_template == constants.DT_DISKLESS:
201
      disk_size = disk_growth = []
202
    else:
203
      disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
204
      disk_growth = [utils.ParseUnit(v)
205
                     for v in options.disk_growth.split(",")]
206
      if len(disk_growth) != len(disk_size):
207
        Log("Wrong disk sizes/growth combination")
208
        sys.exit(1)
209
    if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
210
        (not disk_size and options.disk_template != constants.DT_DISKLESS)):
211
      Log("Wrong disk count/disk template combination")
212
      sys.exit(1)
213

    
214
    self.disk_size = disk_size
215
    self.disk_growth = disk_growth
216
    self.disk_count = len(disk_size)
217

    
218
    if options.nodes and options.iallocator:
219
      Log("Give either the nodes option or the iallocator option, not both")
220
      sys.exit(1)
221

    
222
    self.opts = options
223
    self.instances = args
224
    self.bep = {
225
      constants.BE_MEMORY: options.mem_size,
226
      constants.BE_VCPUS: 1,
227
      }
228
    self.hvp = {}
229

    
230
  def GetState(self):
231
    """Read the cluster state from the config."""
232
    if self.opts.nodes:
233
      names = self.opts.nodes.split(",")
234
    else:
235
      names = []
236
    try:
237
      op = opcodes.OpQueryNodes(output_fields=["name", "offline"], names=names)
238
      result = self.ExecOp(op)
239
    except errors.GenericError, err:
240
      err_code, msg = cli.FormatError(err)
241
      Log(msg)
242
      sys.exit(err_code)
243
    self.nodes = [data[0] for data in result if not data[1]]
244

    
245
    result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
246
                                              names=[]))
247

    
248
    if not result:
249
      Log("Can't get the OS list")
250
      sys.exit(1)
251

    
252
    # filter non-valid OS-es
253
    os_set = [val[0] for val in result if val[1]]
254

    
255
    if self.opts.os not in os_set:
256
      Log("OS '%s' not found" % self.opts.os)
257
      sys.exit(1)
258

    
259
  def CreateInstances(self):
260
    """Create the given instances.
261

    
262
    """
263
    self.to_rem = []
264
    mytor = izip(cycle(self.nodes),
265
                 islice(cycle(self.nodes), 1, None),
266
                 self.instances)
267
    jobset = []
268

    
269
    for pnode, snode, instance in mytor:
270
      if self.opts.iallocator:
271
        pnode = snode = None
272
        Log("- Add instance %s (iallocator: %s)" %
273
              (instance, self.opts.iallocator))
274
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
275
        snode = None
276
        Log("- Add instance %s on node %s" % (instance, pnode))
277
      else:
278
        Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode))
279

    
280
      op = opcodes.OpCreateInstance(instance_name=instance,
281
                                    disks = [ {"size": size}
282
                                              for size in self.disk_size],
283
                                    disk_template=self.opts.disk_template,
284
                                    nics=self.opts.nics,
285
                                    mode=constants.INSTANCE_CREATE,
286
                                    os_type=self.opts.os,
287
                                    pnode=pnode,
288
                                    snode=snode,
289
                                    start=True,
290
                                    ip_check=True,
291
                                    wait_for_sync=True,
292
                                    file_driver="loop",
293
                                    file_storage_dir=None,
294
                                    iallocator=self.opts.iallocator,
295
                                    beparams=self.bep,
296
                                    hvparams=self.hvp,
297
                                    )
298

    
299
      if self.opts.parallel:
300
        jobset.append([op])
301
        # FIXME: here we should not append to to_rem uncoditionally,
302
        # but only when the job is successful
303
        self.to_rem.append(instance)
304
      else:
305
        self.ExecOp(op)
306
        self.to_rem.append(instance)
307
    if self.opts.parallel:
308
      self.ExecJobSet(jobset)
309

    
310
  def GrowDisks(self):
311
    """Grow both the os and the swap disks by the requested amount, if any."""
312
    for instance in self.instances:
313
      for idx, growth in enumerate(self.disk_growth):
314
        if growth > 0:
315
          op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
316
                                  amount=growth, wait_for_sync=True)
317
          Log("- Increase %s's disk/%s by %s MB" % (instance, idx, growth))
318
          self.ExecOp(op)
319

    
320
  def ReplaceDisks1D8(self):
321
    """Replace disks on primary and secondary for drbd8."""
322
    for instance in self.instances:
323
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
324
        op = opcodes.OpReplaceDisks(instance_name=instance,
325
                                    mode=mode,
326
                                    disks=[i for i in range(self.disk_count)])
327
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
328
        self.ExecOp(op)
329

    
330
  def ReplaceDisks2(self):
331
    """Replace secondary node."""
332
    mode = constants.REPLACE_DISK_CHG
333

    
334
    mytor = izip(islice(cycle(self.nodes), 2, None),
335
                 self.instances)
336
    for tnode, instance in mytor:
337
      if self.opts.iallocator:
338
        tnode = None
339
      op = opcodes.OpReplaceDisks(instance_name=instance,
340
                                  mode=mode,
341
                                  remote_node=tnode,
342
                                  iallocator=self.opts.iallocator,
343
                                  disks=[i for i in range(self.disk_count)])
344
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
345
      self.ExecOp(op)
346

    
347
  def Failover(self):
348
    """Failover the instances."""
349

    
350
    for instance in self.instances:
351
      op = opcodes.OpFailoverInstance(instance_name=instance,
352
                                      ignore_consistency=False)
353

    
354
      Log("- Failover instance %s" % (instance))
355
      self.ExecOp(op)
356

    
357
  def ImportExport(self):
358
    """Export the instance, delete it, and import it back.
359

    
360
    """
361

    
362
    mytor = izip(cycle(self.nodes),
363
                 islice(cycle(self.nodes), 1, None),
364
                 islice(cycle(self.nodes), 2, None),
365
                 self.instances)
366

    
367
    for pnode, snode, enode, instance in mytor:
368

    
369
      if self.opts.iallocator:
370
        pnode = snode = None
371
        import_log_msg = ("- Import instance %s from node %s"
372
                          " (iallocator: %s)" %
373
                          (instance, enode, self.opts.iallocator))
374
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
375
        snode = None
376
        import_log_msg = ("- Import instance %s from node %s to node %s" %
377
                          (instance, enode, pnode))
378
      else:
379
        import_log_msg = ("- Import instance %s from node %s to nodes %s/%s" %
380
                          (instance, enode, pnode, snode))
381

    
382
      exp_op = opcodes.OpExportInstance(instance_name=instance,
383
                                           target_node=enode,
384
                                           shutdown=True)
385
      rem_op = opcodes.OpRemoveInstance(instance_name=instance,
386
                                        ignore_failures=True)
387
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
388
                                           names=[instance])
389
      full_name = self.ExecOp(nam_op)[0][0]
390
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
391
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
392
                                        disks = [ {"size": size}
393
                                                  for size in self.disk_size],
394
                                        disk_template=self.opts.disk_template,
395
                                        nics=self.opts.nics,
396
                                        mode=constants.INSTANCE_IMPORT,
397
                                        src_node=enode,
398
                                        src_path=imp_dir,
399
                                        pnode=pnode,
400
                                        snode=snode,
401
                                        start=True,
402
                                        ip_check=True,
403
                                        wait_for_sync=True,
404
                                        file_storage_dir=None,
405
                                        file_driver="loop",
406
                                        iallocator=self.opts.iallocator,
407
                                        beparams=self.bep,
408
                                        hvparams=self.hvp,
409
                                        )
410

    
411
      erem_op = opcodes.OpRemoveExport(instance_name=instance)
412

    
413
      Log("- Export instance %s to node %s" % (instance, enode))
414
      self.ExecOp(exp_op)
415
      Log("- Remove instance %s" % (instance))
416
      self.ExecOp(rem_op)
417
      self.to_rem.remove(instance)
418
      Log(import_log_msg)
419
      self.ExecOp(imp_op)
420
      Log("- Remove export of instance %s" % (instance))
421
      self.ExecOp(erem_op)
422

    
423
      self.to_rem.append(instance)
424

    
425
  def StopInstance(self, instance):
426
    """Stop given instance."""
427
    op = opcodes.OpShutdownInstance(instance_name=instance)
428
    Log("- Shutdown instance %s" % instance)
429
    self.ExecOp(op)
430

    
431
  def StartInstance(self, instance):
432
    """Start given instance."""
433
    op = opcodes.OpStartupInstance(instance_name=instance, force=False)
434
    Log("- Start instance %s" % instance)
435
    self.ExecOp(op)
436

    
437
  def RenameInstance(self, instance, instance_new):
438
    """Rename instance."""
439
    op = opcodes.OpRenameInstance(instance_name=instance,
440
                                  new_name=instance_new)
441
    Log("- Rename instance %s to %s" % (instance, instance_new))
442
    self.ExecOp(op)
443

    
444
  def StopStart(self):
445
    """Stop/start the instances."""
446
    for instance in self.instances:
447
      self.StopInstance(instance)
448
      self.StartInstance(instance)
449

    
450
  def Remove(self):
451
    """Remove the instances."""
452
    for instance in self.to_rem:
453
      op = opcodes.OpRemoveInstance(instance_name=instance,
454
                                    ignore_failures=True)
455
      Log("- Remove instance %s" % instance)
456
      self.ExecOp(op)
457

    
458
  def Rename(self):
459
    """Rename the instances."""
460
    rename = self.opts.rename
461
    for instance in self.instances:
462
      self.StopInstance(instance)
463
      self.RenameInstance(instance, rename)
464
      self.StartInstance(rename)
465
      self.StopInstance(rename)
466
      self.RenameInstance(rename, instance)
467
      self.StartInstance(instance)
468

    
469
  def Reinstall(self):
470
    """Reinstall the instances."""
471
    for instance in self.instances:
472
      self.StopInstance(instance)
473
      op = opcodes.OpReinstallInstance(instance_name=instance)
474
      Log("- Reinstall instance %s without passing the OS" % (instance,))
475
      self.ExecOp(op)
476
      op = opcodes.OpReinstallInstance(instance_name=instance,
477
                                       os_type=self.opts.os)
478
      Log("- Reinstall instance %s specifying the OS" % (instance,))
479
      self.ExecOp(op)
480
      self.StartInstance(instance)
481

    
482
  def Reboot(self):
483
    """Reinstall the instances."""
484
    for instance in self.instances:
485
      for reboot_type in constants.REBOOT_TYPES:
486
        op = opcodes.OpRebootInstance(instance_name=instance,
487
                                      reboot_type=reboot_type,
488
                                      ignore_secondaries=False)
489
        Log("- Reboot instance %s with type '%s'" % (instance, reboot_type))
490
        self.ExecOp(op)
491

    
492
  def ActivateDisks(self):
493
    """Activate and deactivate disks of the instances."""
494
    for instance in self.instances:
495
      op_act = opcodes.OpActivateInstanceDisks(instance_name=instance)
496
      op_deact = opcodes.OpDeactivateInstanceDisks(instance_name=instance)
497
      Log("- Activate disks of online instance %s" % (instance,))
498
      self.ExecOp(op_act)
499
      self.StopInstance(instance)
500
      Log("- Activate disks of offline instance %s" % (instance,))
501
      self.ExecOp(op_act)
502
      Log("- Deactivate disks of offline instance %s" % (instance,))
503
      self.ExecOp(op_deact)
504
      self.StartInstance(instance)
505

    
506
  def BurninCluster(self):
507
    """Test a cluster intensively.
508

    
509
    This will create instances and then start/stop/failover them.
510
    It is safe for existing instances but could impact performance.
511

    
512
    """
513

    
514
    opts = self.opts
515

    
516
    Log("- Testing global parameters")
517

    
518
    if (len(self.nodes) == 1 and
519
        opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
520
                                   constants.DT_FILE)):
521
      Log("When one node is available/selected the disk template must"
522
          " be 'diskless', 'file' or 'plain'")
523
      sys.exit(1)
524

    
525
    has_err = True
526
    try:
527
      self.CreateInstances()
528
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
529
        self.ReplaceDisks1D8()
530
      if (opts.do_replace2 and len(self.nodes) > 2 and
531
          opts.disk_template in constants.DTS_NET_MIRROR) :
532
        self.ReplaceDisks2()
533

    
534
      if opts.disk_template != constants.DT_DISKLESS:
535
        self.GrowDisks()
536

    
537
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
538
        self.Failover()
539

    
540
      if (opts.do_importexport and
541
          opts.disk_template not in (constants.DT_DISKLESS,
542
                                     constants.DT_FILE)):
543
        self.ImportExport()
544

    
545
      if opts.do_reinstall:
546
        self.Reinstall()
547

    
548
      if opts.do_reboot:
549
        self.Reboot()
550

    
551
      if opts.do_activate_disks:
552
        self.ActivateDisks()
553

    
554
      if opts.do_startstop:
555
        self.StopStart()
556

    
557
      if opts.rename:
558
        self.Rename()
559

    
560
      has_err = False
561
    finally:
562
      if has_err:
563
        Log("Error detected: opcode buffer follows:\n\n")
564
        Log(self.GetFeedbackBuf())
565
        Log("\n\n")
566
      self.Remove()
567

    
568
    return 0
569

    
570

    
571
def main():
572
  """Main function"""
573

    
574
  burner = Burner()
575
  return burner.BurninCluster()
576

    
577

    
578
if __name__ == "__main__":
579
  main()