Statistics
| Branch: | Tag: | Revision:

root / tools / burnin @ 5c22d16e

History | View | Annotate | Download (22.5 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Burnin program
23

    
24
"""
25

    
26
import os
27
import sys
28
import optparse
29
import time
30
from itertools import izip, islice, cycle
31
from cStringIO import StringIO
32

    
33
from ganeti import opcodes
34
from ganeti import mcpu
35
from ganeti import constants
36
from ganeti import cli
37
from ganeti import errors
38
from ganeti import utils
39

    
40

    
41
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
42

    
43

    
44
def Usage():
45
  """Shows program usage information and exits the program."""
46

    
47
  print >> sys.stderr, "Usage:"
48
  print >> sys.stderr, USAGE
49
  sys.exit(2)
50

    
51

    
52
def Log(msg):
53
  """Simple function that prints out its argument.
54

    
55
  """
56
  print msg
57
  sys.stdout.flush()
58

    
59

    
60
class Burner(object):
61
  """Burner class."""
62

    
63
  def __init__(self):
64
    """Constructor."""
65
    utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
66
    self._feed_buf = StringIO()
67
    self.nodes = []
68
    self.instances = []
69
    self.to_rem = []
70
    self.opts = None
71
    self.cl = cli.GetClient()
72
    self.ParseOptions()
73
    self.GetState()
74

    
75
  def ClearFeedbackBuf(self):
76
    """Clear the feedback buffer."""
77
    self._feed_buf.truncate(0)
78

    
79
  def GetFeedbackBuf(self):
80
    """Return the contents of the buffer."""
81
    return self._feed_buf.getvalue()
82

    
83
  def Feedback(self, msg):
84
    """Acumulate feedback in our buffer."""
85
    self._feed_buf.write("%s %s\n" % (time.ctime(utils.MergeTime(msg[0])),
86
                                      msg[2]))
87
    if self.opts.verbose:
88
      Log(msg)
89

    
90
  def ExecOp(self, op):
91
    """Execute an opcode and manage the exec buffer."""
92
    self.ClearFeedbackBuf()
93
    return cli.SubmitOpCode(op, feedback_fn=self.Feedback, cl=self.cl)
94

    
95
  def ExecJobSet(self, jobs):
96
    """Execute a set of jobs and return once all are done.
97

    
98
    The method will return the list of results, if all jobs are
99
    successfull. Otherwise, OpExecError will be raised from within
100
    cli.py.
101

    
102
    """
103
    self.ClearFeedbackBuf()
104
    job_ids = [cli.SendJob(job, cl=self.cl) for job in jobs]
105
    Log("- Submitted job IDs %s" % ", ".join(job_ids))
106
    results = []
107
    for jid in job_ids:
108
      Log("- Waiting for job %s" % jid)
109
      results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
110

    
111
    return results
112

    
113
  def ParseOptions(self):
114
    """Parses the command line options.
115

    
116
    In case of command line errors, it will show the usage and exit the
117
    program.
118

    
119
    """
120

    
121
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
122
                                   version="%%prog (ganeti) %s" %
123
                                   constants.RELEASE_VERSION,
124
                                   option_class=cli.CliOption)
125

    
126
    parser.add_option("-o", "--os", dest="os", default=None,
127
                      help="OS to use during burnin",
128
                      metavar="<OS>")
129
    parser.add_option("--disk-size", dest="disk_size",
130
                      help="Disk size (determines disk count)",
131
                      default="128m", type="string", metavar="<size,size,...>")
132
    parser.add_option("--disk-growth", dest="disk_growth", help="Disk growth",
133
                      default="128m", type="string", metavar="<size,size,...>")
134
    parser.add_option("--mem-size", dest="mem_size", help="Memory size",
135
                      default=128, type="unit", metavar="<size>")
136
    parser.add_option("-v", "--verbose",
137
                      action="store_true", dest="verbose", default=False,
138
                      help="print command execution messages to stdout")
139
    parser.add_option("--no-replace1", dest="do_replace1",
140
                      help="Skip disk replacement with the same secondary",
141
                      action="store_false", default=True)
142
    parser.add_option("--no-replace2", dest="do_replace2",
143
                      help="Skip disk replacement with a different secondary",
144
                      action="store_false", default=True)
145
    parser.add_option("--no-failover", dest="do_failover",
146
                      help="Skip instance failovers", action="store_false",
147
                      default=True)
148
    parser.add_option("--no-importexport", dest="do_importexport",
149
                      help="Skip instance export/import", action="store_false",
150
                      default=True)
151
    parser.add_option("--no-startstop", dest="do_startstop",
152
                      help="Skip instance stop/start", action="store_false",
153
                      default=True)
154
    parser.add_option("--no-reinstall", dest="do_reinstall",
155
                      help="Skip instance reinstall", action="store_false",
156
                      default=True)
157
    parser.add_option("--no-reboot", dest="do_reboot",
158
                      help="Skip instance reboot", action="store_false",
159
                      default=True)
160
    parser.add_option("--no-activate-disks", dest="do_activate_disks",
161
                      help="Skip disk activation/deactivation",
162
                      action="store_false", default=True)
163
    parser.add_option("--no-add-disks", dest="do_addremove_disks",
164
                      help="Skip disk addition/removal",
165
                      action="store_false", default=True)
166
    parser.add_option("--no-add-nics", dest="do_addremove_nics",
167
                      help="Skip NIC addition/removal",
168
                      action="store_false", default=True)
169
    parser.add_option("--no-nics", dest="nics",
170
                      help="No network interfaces", action="store_const",
171
                      const=[], default=[{}])
172
    parser.add_option("--rename", dest="rename", default=None,
173
                      help="Give one unused instance name which is taken"
174
                           " to start the renaming sequence",
175
                      metavar="<instance_name>")
176
    parser.add_option("-t", "--disk-template", dest="disk_template",
177
                      choices=("diskless", "file", "plain", "drbd"),
178
                      default="drbd",
179
                      help="Disk template (diskless, file, plain or drbd)"
180
                            " [drbd]")
181
    parser.add_option("-n", "--nodes", dest="nodes", default="",
182
                      help="Comma separated list of nodes to perform"
183
                      " the burnin on (defaults to all nodes)")
184
    parser.add_option("--iallocator", dest="iallocator",
185
                      default=None, type="string",
186
                      help="Perform the allocation using an iallocator"
187
                      " instead of fixed node spread (node restrictions no"
188
                      " longer apply, therefore -n/--nodes must not be used")
189
    parser.add_option("-p", "--parallel", default=False, action="store_true",
190
                      dest="parallel",
191
                      help="Enable parallelization of some operations in"
192
                      " order to speed burnin or to test granular locking")
193

    
194
    options, args = parser.parse_args()
195
    if len(args) < 1 or options.os is None:
196
      Usage()
197

    
198
    supported_disk_templates = (constants.DT_DISKLESS,
199
                                constants.DT_FILE,
200
                                constants.DT_PLAIN,
201
                                constants.DT_DRBD8)
202
    if options.disk_template not in supported_disk_templates:
203
      Log("Unknown disk template '%s'" % options.disk_template)
204
      sys.exit(1)
205

    
206
    if options.disk_template == constants.DT_DISKLESS:
207
      disk_size = disk_growth = []
208
      opts.do_addremove_disks = False
209
    else:
210
      disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
211
      disk_growth = [utils.ParseUnit(v)
212
                     for v in options.disk_growth.split(",")]
213
      if len(disk_growth) != len(disk_size):
214
        Log("Wrong disk sizes/growth combination")
215
        sys.exit(1)
216
    if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
217
        (not disk_size and options.disk_template != constants.DT_DISKLESS)):
218
      Log("Wrong disk count/disk template combination")
219
      sys.exit(1)
220

    
221
    self.disk_size = disk_size
222
    self.disk_growth = disk_growth
223
    self.disk_count = len(disk_size)
224

    
225
    if options.nodes and options.iallocator:
226
      Log("Give either the nodes option or the iallocator option, not both")
227
      sys.exit(1)
228

    
229
    self.opts = options
230
    self.instances = args
231
    self.bep = {
232
      constants.BE_MEMORY: options.mem_size,
233
      constants.BE_VCPUS: 1,
234
      }
235
    self.hvp = {}
236

    
237
  def GetState(self):
238
    """Read the cluster state from the config."""
239
    if self.opts.nodes:
240
      names = self.opts.nodes.split(",")
241
    else:
242
      names = []
243
    try:
244
      op = opcodes.OpQueryNodes(output_fields=["name", "offline"], names=names)
245
      result = self.ExecOp(op)
246
    except errors.GenericError, err:
247
      err_code, msg = cli.FormatError(err)
248
      Log(msg)
249
      sys.exit(err_code)
250
    self.nodes = [data[0] for data in result if not data[1]]
251

    
252
    result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
253
                                              names=[]))
254

    
255
    if not result:
256
      Log("Can't get the OS list")
257
      sys.exit(1)
258

    
259
    # filter non-valid OS-es
260
    os_set = [val[0] for val in result if val[1]]
261

    
262
    if self.opts.os not in os_set:
263
      Log("OS '%s' not found" % self.opts.os)
264
      sys.exit(1)
265

    
266
  def CreateInstances(self):
267
    """Create the given instances.
268

    
269
    """
270
    self.to_rem = []
271
    mytor = izip(cycle(self.nodes),
272
                 islice(cycle(self.nodes), 1, None),
273
                 self.instances)
274
    jobset = []
275

    
276
    for pnode, snode, instance in mytor:
277
      if self.opts.iallocator:
278
        pnode = snode = None
279
        Log("- Add instance %s (iallocator: %s)" %
280
              (instance, self.opts.iallocator))
281
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
282
        snode = None
283
        Log("- Add instance %s on node %s" % (instance, pnode))
284
      else:
285
        Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode))
286

    
287
      op = opcodes.OpCreateInstance(instance_name=instance,
288
                                    disks = [ {"size": size}
289
                                              for size in self.disk_size],
290
                                    disk_template=self.opts.disk_template,
291
                                    nics=self.opts.nics,
292
                                    mode=constants.INSTANCE_CREATE,
293
                                    os_type=self.opts.os,
294
                                    pnode=pnode,
295
                                    snode=snode,
296
                                    start=True,
297
                                    ip_check=True,
298
                                    wait_for_sync=True,
299
                                    file_driver="loop",
300
                                    file_storage_dir=None,
301
                                    iallocator=self.opts.iallocator,
302
                                    beparams=self.bep,
303
                                    hvparams=self.hvp,
304
                                    )
305

    
306
      if self.opts.parallel:
307
        jobset.append([op])
308
        # FIXME: here we should not append to to_rem uncoditionally,
309
        # but only when the job is successful
310
        self.to_rem.append(instance)
311
      else:
312
        self.ExecOp(op)
313
        self.to_rem.append(instance)
314
    if self.opts.parallel:
315
      self.ExecJobSet(jobset)
316

    
317
  def GrowDisks(self):
318
    """Grow both the os and the swap disks by the requested amount, if any."""
319
    for instance in self.instances:
320
      for idx, growth in enumerate(self.disk_growth):
321
        if growth > 0:
322
          op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
323
                                  amount=growth, wait_for_sync=True)
324
          Log("- Increase %s's disk/%s by %s MB" % (instance, idx, growth))
325
          self.ExecOp(op)
326

    
327
  def ReplaceDisks1D8(self):
328
    """Replace disks on primary and secondary for drbd8."""
329
    for instance in self.instances:
330
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
331
        op = opcodes.OpReplaceDisks(instance_name=instance,
332
                                    mode=mode,
333
                                    disks=[i for i in range(self.disk_count)])
334
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
335
        self.ExecOp(op)
336

    
337
  def ReplaceDisks2(self):
338
    """Replace secondary node."""
339
    mode = constants.REPLACE_DISK_CHG
340

    
341
    mytor = izip(islice(cycle(self.nodes), 2, None),
342
                 self.instances)
343
    for tnode, instance in mytor:
344
      if self.opts.iallocator:
345
        tnode = None
346
      op = opcodes.OpReplaceDisks(instance_name=instance,
347
                                  mode=mode,
348
                                  remote_node=tnode,
349
                                  iallocator=self.opts.iallocator,
350
                                  disks=[i for i in range(self.disk_count)])
351
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
352
      self.ExecOp(op)
353

    
354
  def Failover(self):
355
    """Failover the instances."""
356

    
357
    for instance in self.instances:
358
      op = opcodes.OpFailoverInstance(instance_name=instance,
359
                                      ignore_consistency=False)
360

    
361
      Log("- Failover instance %s" % (instance))
362
      self.ExecOp(op)
363

    
364
  def ImportExport(self):
365
    """Export the instance, delete it, and import it back.
366

    
367
    """
368

    
369
    mytor = izip(cycle(self.nodes),
370
                 islice(cycle(self.nodes), 1, None),
371
                 islice(cycle(self.nodes), 2, None),
372
                 self.instances)
373

    
374
    for pnode, snode, enode, instance in mytor:
375

    
376
      if self.opts.iallocator:
377
        pnode = snode = None
378
        import_log_msg = ("- Import instance %s from node %s"
379
                          " (iallocator: %s)" %
380
                          (instance, enode, self.opts.iallocator))
381
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
382
        snode = None
383
        import_log_msg = ("- Import instance %s from node %s to node %s" %
384
                          (instance, enode, pnode))
385
      else:
386
        import_log_msg = ("- Import instance %s from node %s to nodes %s/%s" %
387
                          (instance, enode, pnode, snode))
388

    
389
      exp_op = opcodes.OpExportInstance(instance_name=instance,
390
                                           target_node=enode,
391
                                           shutdown=True)
392
      rem_op = opcodes.OpRemoveInstance(instance_name=instance,
393
                                        ignore_failures=True)
394
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
395
                                           names=[instance])
396
      full_name = self.ExecOp(nam_op)[0][0]
397
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
398
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
399
                                        disks = [ {"size": size}
400
                                                  for size in self.disk_size],
401
                                        disk_template=self.opts.disk_template,
402
                                        nics=self.opts.nics,
403
                                        mode=constants.INSTANCE_IMPORT,
404
                                        src_node=enode,
405
                                        src_path=imp_dir,
406
                                        pnode=pnode,
407
                                        snode=snode,
408
                                        start=True,
409
                                        ip_check=True,
410
                                        wait_for_sync=True,
411
                                        file_storage_dir=None,
412
                                        file_driver="loop",
413
                                        iallocator=self.opts.iallocator,
414
                                        beparams=self.bep,
415
                                        hvparams=self.hvp,
416
                                        )
417

    
418
      erem_op = opcodes.OpRemoveExport(instance_name=instance)
419

    
420
      Log("- Export instance %s to node %s" % (instance, enode))
421
      self.ExecOp(exp_op)
422
      Log("- Remove instance %s" % (instance))
423
      self.ExecOp(rem_op)
424
      self.to_rem.remove(instance)
425
      Log(import_log_msg)
426
      self.ExecOp(imp_op)
427
      Log("- Remove export of instance %s" % (instance))
428
      self.ExecOp(erem_op)
429

    
430
      self.to_rem.append(instance)
431

    
432
  def StopInstance(self, instance):
433
    """Stop given instance."""
434
    op = opcodes.OpShutdownInstance(instance_name=instance)
435
    Log("- Shutdown instance %s" % instance)
436
    self.ExecOp(op)
437

    
438
  def StartInstance(self, instance):
439
    """Start given instance."""
440
    op = opcodes.OpStartupInstance(instance_name=instance, force=False)
441
    Log("- Start instance %s" % instance)
442
    self.ExecOp(op)
443

    
444
  def RenameInstance(self, instance, instance_new):
445
    """Rename instance."""
446
    op = opcodes.OpRenameInstance(instance_name=instance,
447
                                  new_name=instance_new)
448
    Log("- Rename instance %s to %s" % (instance, instance_new))
449
    self.ExecOp(op)
450

    
451
  def StopStart(self):
452
    """Stop/start the instances."""
453
    for instance in self.instances:
454
      self.StopInstance(instance)
455
      self.StartInstance(instance)
456

    
457
  def Remove(self):
458
    """Remove the instances."""
459
    for instance in self.to_rem:
460
      op = opcodes.OpRemoveInstance(instance_name=instance,
461
                                    ignore_failures=True)
462
      Log("- Remove instance %s" % instance)
463
      self.ExecOp(op)
464

    
465
  def Rename(self):
466
    """Rename the instances."""
467
    rename = self.opts.rename
468
    for instance in self.instances:
469
      self.StopInstance(instance)
470
      self.RenameInstance(instance, rename)
471
      self.StartInstance(rename)
472
      self.StopInstance(rename)
473
      self.RenameInstance(rename, instance)
474
      self.StartInstance(instance)
475

    
476
  def Reinstall(self):
477
    """Reinstall the instances."""
478
    for instance in self.instances:
479
      self.StopInstance(instance)
480
      op = opcodes.OpReinstallInstance(instance_name=instance)
481
      Log("- Reinstall instance %s without passing the OS" % (instance,))
482
      self.ExecOp(op)
483
      op = opcodes.OpReinstallInstance(instance_name=instance,
484
                                       os_type=self.opts.os)
485
      Log("- Reinstall instance %s specifying the OS" % (instance,))
486
      self.ExecOp(op)
487
      self.StartInstance(instance)
488

    
489
  def Reboot(self):
490
    """Reinstall the instances."""
491
    for instance in self.instances:
492
      for reboot_type in constants.REBOOT_TYPES:
493
        op = opcodes.OpRebootInstance(instance_name=instance,
494
                                      reboot_type=reboot_type,
495
                                      ignore_secondaries=False)
496
        Log("- Reboot instance %s with type '%s'" % (instance, reboot_type))
497
        self.ExecOp(op)
498

    
499
  def ActivateDisks(self):
500
    """Activate and deactivate disks of the instances."""
501
    for instance in self.instances:
502
      op_act = opcodes.OpActivateInstanceDisks(instance_name=instance)
503
      op_deact = opcodes.OpDeactivateInstanceDisks(instance_name=instance)
504
      Log("- Activate disks of online instance %s" % (instance,))
505
      self.ExecOp(op_act)
506
      self.StopInstance(instance)
507
      Log("- Activate disks of offline instance %s" % (instance,))
508
      self.ExecOp(op_act)
509
      Log("- Deactivate disks of offline instance %s" % (instance,))
510
      self.ExecOp(op_deact)
511
      self.StartInstance(instance)
512

    
513
  def AddRemoveDisks(self):
514
    """Add and remove an extra disk for the instances."""
515
    for instance in self.instances:
516
      op_add = opcodes.OpSetInstanceParams(\
517
        instance_name=instance,
518
        disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})])
519
      op_rem = opcodes.OpSetInstanceParams(\
520
        instance_name=instance, disks=[(constants.DDM_REMOVE, {})])
521
      Log("- Adding a disk to instance %s" % (instance,))
522
      self.ExecOp(op_add)
523
      self.StopInstance(instance)
524
      Log("- Removing the last disk of instance %s" % (instance,))
525
      self.ExecOp(op_rem)
526
      self.StartInstance(instance)
527

    
528
  def AddRemoveNICs(self):
529
    """Add and remove an extra NIC for the instances."""
530
    for instance in self.instances:
531
      op_add = opcodes.OpSetInstanceParams(\
532
        instance_name=instance, nics=[(constants.DDM_ADD, {})])
533
      op_rem = opcodes.OpSetInstanceParams(\
534
        instance_name=instance, nics=[(constants.DDM_REMOVE, {})])
535
      Log("- Adding a NIC to instance %s" % (instance,))
536
      self.ExecOp(op_add)
537
      Log("- Removing the last NIC of instance %s" % (instance,))
538
      self.ExecOp(op_rem)
539

    
540
  def BurninCluster(self):
541
    """Test a cluster intensively.
542

    
543
    This will create instances and then start/stop/failover them.
544
    It is safe for existing instances but could impact performance.
545

    
546
    """
547

    
548
    opts = self.opts
549

    
550
    Log("- Testing global parameters")
551

    
552
    if (len(self.nodes) == 1 and
553
        opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
554
                                   constants.DT_FILE)):
555
      Log("When one node is available/selected the disk template must"
556
          " be 'diskless', 'file' or 'plain'")
557
      sys.exit(1)
558

    
559
    has_err = True
560
    try:
561
      self.CreateInstances()
562
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
563
        self.ReplaceDisks1D8()
564
      if (opts.do_replace2 and len(self.nodes) > 2 and
565
          opts.disk_template in constants.DTS_NET_MIRROR) :
566
        self.ReplaceDisks2()
567

    
568
      if opts.disk_template != constants.DT_DISKLESS:
569
        self.GrowDisks()
570

    
571
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
572
        self.Failover()
573

    
574
      if (opts.do_importexport and
575
          opts.disk_template not in (constants.DT_DISKLESS,
576
                                     constants.DT_FILE)):
577
        self.ImportExport()
578

    
579
      if opts.do_reinstall:
580
        self.Reinstall()
581

    
582
      if opts.do_reboot:
583
        self.Reboot()
584

    
585
      if opts.do_addremove_disks:
586
        self.AddRemoveDisks()
587

    
588
      if opts.do_addremove_nics:
589
        self.AddRemoveNICs()
590

    
591
      if opts.do_activate_disks:
592
        self.ActivateDisks()
593

    
594
      if opts.do_startstop:
595
        self.StopStart()
596

    
597
      if opts.rename:
598
        self.Rename()
599

    
600
      has_err = False
601
    finally:
602
      if has_err:
603
        Log("Error detected: opcode buffer follows:\n\n")
604
        Log(self.GetFeedbackBuf())
605
        Log("\n\n")
606
      self.Remove()
607

    
608
    return 0
609

    
610

    
611
def main():
612
  """Main function"""
613

    
614
  burner = Burner()
615
  return burner.BurninCluster()
616

    
617

    
618
if __name__ == "__main__":
619
  main()