Statistics
| Branch: | Tag: | Revision:

root / tools / burnin @ 08db7c5c

History | View | Annotate | Download (18.2 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Burnin program
23

    
24
"""
25

    
26
import os
27
import sys
28
import optparse
29
import time
30
from itertools import izip, islice, cycle
31
from cStringIO import StringIO
32

    
33
from ganeti import opcodes
34
from ganeti import mcpu
35
from ganeti import constants
36
from ganeti import cli
37
from ganeti import errors
38
from ganeti import utils
39

    
40

    
41
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
42

    
43

    
44
def Usage():
45
  """Shows program usage information and exits the program."""
46

    
47
  print >> sys.stderr, "Usage:"
48
  print >> sys.stderr, USAGE
49
  sys.exit(2)
50

    
51

    
52
def Log(msg):
53
  """Simple function that prints out its argument.
54

    
55
  """
56
  print msg
57
  sys.stdout.flush()
58

    
59

    
60
class Burner(object):
61
  """Burner class."""
62

    
63
  def __init__(self):
64
    """Constructor."""
65
    utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
66
    self._feed_buf = StringIO()
67
    self.nodes = []
68
    self.instances = []
69
    self.to_rem = []
70
    self.opts = None
71
    self.cl = cli.GetClient()
72
    self.ParseOptions()
73
    self.GetState()
74

    
75
  def ClearFeedbackBuf(self):
76
    """Clear the feedback buffer."""
77
    self._feed_buf.truncate(0)
78

    
79
  def GetFeedbackBuf(self):
80
    """Return the contents of the buffer."""
81
    return self._feed_buf.getvalue()
82

    
83
  def Feedback(self, msg):
84
    """Acumulate feedback in our buffer."""
85
    self._feed_buf.write("%s %s\n" % (time.ctime(utils.MergeTime(msg[0])),
86
                                      msg[2]))
87
    if self.opts.verbose:
88
      Log(msg)
89

    
90
  def ExecOp(self, op):
91
    """Execute an opcode and manage the exec buffer."""
92
    self.ClearFeedbackBuf()
93
    return cli.SubmitOpCode(op, feedback_fn=self.Feedback, cl=self.cl)
94

    
95
  def ExecJobSet(self, jobs):
96
    """Execute a set of jobs and return once all are done.
97

    
98
    The method will return the list of results, if all jobs are
99
    successfull. Otherwise, OpExecError will be raised from within
100
    cli.py.
101

    
102
    """
103
    self.ClearFeedbackBuf()
104
    job_ids = [cli.SendJob(job, cl=self.cl) for job in jobs]
105
    Log("- Submitted job IDs %s" % ", ".join(job_ids))
106
    results = []
107
    for jid in job_ids:
108
      Log("- Waiting for job %s" % jid)
109
      results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
110

    
111
    return results
112

    
113
  def ParseOptions(self):
114
    """Parses the command line options.
115

    
116
    In case of command line errors, it will show the usage and exit the
117
    program.
118

    
119
    """
120

    
121
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
122
                                   version="%%prog (ganeti) %s" %
123
                                   constants.RELEASE_VERSION,
124
                                   option_class=cli.CliOption)
125

    
126
    parser.add_option("-o", "--os", dest="os", default=None,
127
                      help="OS to use during burnin",
128
                      metavar="<OS>")
129
    parser.add_option("--disk-size", dest="disk_size",
130
                      help="Disk size (determines disk count)",
131
                      default="128m", type="string", metavar="<size,size,...>")
132
    parser.add_option("--disk-growth", dest="disk_growth", help="Disk growth",
133
                      default=128, type="string", metavar="<size,size,...>")
134
    parser.add_option("--mem-size", dest="mem_size", help="Memory size",
135
                      default=128, type="unit", metavar="<size>")
136
    parser.add_option("-v", "--verbose",
137
                      action="store_true", dest="verbose", default=False,
138
                      help="print command execution messages to stdout")
139
    parser.add_option("--no-replace1", dest="do_replace1",
140
                      help="Skip disk replacement with the same secondary",
141
                      action="store_false", default=True)
142
    parser.add_option("--no-replace2", dest="do_replace2",
143
                      help="Skip disk replacement with a different secondary",
144
                      action="store_false", default=True)
145
    parser.add_option("--no-failover", dest="do_failover",
146
                      help="Skip instance failovers", action="store_false",
147
                      default=True)
148
    parser.add_option("--no-importexport", dest="do_importexport",
149
                      help="Skip instance export/import", action="store_false",
150
                      default=True)
151
    parser.add_option("--no-startstop", dest="do_startstop",
152
                      help="Skip instance stop/start", action="store_false",
153
                      default=True)
154
    parser.add_option("--rename", dest="rename", default=None,
155
                      help="Give one unused instance name which is taken"
156
                           " to start the renaming sequence",
157
                      metavar="<instance_name>")
158
    parser.add_option("-t", "--disk-template", dest="disk_template",
159
                      choices=("diskless", "file", "plain", "drbd"),
160
                      default="drbd",
161
                      help="Disk template (diskless, file, plain or drbd)"
162
                            " [drbd]")
163
    parser.add_option("-n", "--nodes", dest="nodes", default="",
164
                      help="Comma separated list of nodes to perform"
165
                      " the burnin on (defaults to all nodes)")
166
    parser.add_option("--iallocator", dest="iallocator",
167
                      default=None, type="string",
168
                      help="Perform the allocation using an iallocator"
169
                      " instead of fixed node spread (node restrictions no"
170
                      " longer apply, therefore -n/--nodes must not be used")
171
    parser.add_option("-p", "--parallel", default=False, action="store_true",
172
                      dest="parallel",
173
                      help="Enable parallelization of some operations in"
174
                      " order to speed burnin or to test granular locking")
175

    
176
    options, args = parser.parse_args()
177
    if len(args) < 1 or options.os is None:
178
      Usage()
179

    
180
    supported_disk_templates = (constants.DT_DISKLESS,
181
                                constants.DT_FILE,
182
                                constants.DT_PLAIN,
183
                                constants.DT_DRBD8)
184
    if options.disk_template not in supported_disk_templates:
185
      Log("Unknown disk template '%s'" % options.disk_template)
186
      sys.exit(1)
187

    
188
    disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
189
    disk_growth = [utils.ParseUnit(v) for v in options.disk_growth.split(",")]
190
    if len(disk_growth) != len(disk_size):
191
      Log("Wrong disk sizes/growth combination")
192
      sys.exit(1)
193
    if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
194
        (not disk_size and options.disk_template != constants.DT_DISKLESS)):
195
      Log("Wrong disk count/disk template combination")
196
      sys.exit(1)
197

    
198
    self.disk_size = disk_size
199
    self.disk_growth = disk_growth
200
    self.disk_count = len(disk_size)
201

    
202
    if options.nodes and options.iallocator:
203
      Log("Give either the nodes option or the iallocator option, not both")
204
      sys.exit(1)
205

    
206
    self.opts = options
207
    self.instances = args
208
    self.bep = {
209
      constants.BE_MEMORY: options.mem_size,
210
      constants.BE_VCPUS: 1,
211
      }
212
    self.hvp = {}
213

    
214
  def GetState(self):
215
    """Read the cluster state from the config."""
216
    if self.opts.nodes:
217
      names = self.opts.nodes.split(",")
218
    else:
219
      names = []
220
    try:
221
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
222
      result = self.ExecOp(op)
223
    except errors.GenericError, err:
224
      err_code, msg = cli.FormatError(err)
225
      Log(msg)
226
      sys.exit(err_code)
227
    self.nodes = [data[0] for data in result]
228

    
229
    result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
230
                                              names=[]))
231

    
232
    if not result:
233
      Log("Can't get the OS list")
234
      sys.exit(1)
235

    
236
    # filter non-valid OS-es
237
    os_set = [val[0] for val in result if val[1]]
238

    
239
    if self.opts.os not in os_set:
240
      Log("OS '%s' not found" % self.opts.os)
241
      sys.exit(1)
242

    
243
  def CreateInstances(self):
244
    """Create the given instances.
245

    
246
    """
247
    self.to_rem = []
248
    mytor = izip(cycle(self.nodes),
249
                 islice(cycle(self.nodes), 1, None),
250
                 self.instances)
251
    jobset = []
252

    
253
    for pnode, snode, instance in mytor:
254
      if self.opts.iallocator:
255
        pnode = snode = None
256
        Log("- Add instance %s (iallocator: %s)" %
257
              (instance, self.opts.iallocator))
258
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
259
        snode = None
260
        Log("- Add instance %s on node %s" % (instance, pnode))
261
      else:
262
        Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode))
263

    
264
      op = opcodes.OpCreateInstance(instance_name=instance,
265
                                    disks = [ {"size": size}
266
                                              for size in self.disk_size],
267
                                    disk_template=self.opts.disk_template,
268
                                    nics=[{}],
269
                                    mode=constants.INSTANCE_CREATE,
270
                                    os_type=self.opts.os,
271
                                    pnode=pnode,
272
                                    snode=snode,
273
                                    start=True,
274
                                    ip_check=True,
275
                                    wait_for_sync=True,
276
                                    file_driver="loop",
277
                                    file_storage_dir=None,
278
                                    iallocator=self.opts.iallocator,
279
                                    beparams=self.bep,
280
                                    hvparams=self.hvp,
281
                                    )
282

    
283
      if self.opts.parallel:
284
        jobset.append([op])
285
        # FIXME: here we should not append to to_rem uncoditionally,
286
        # but only when the job is successful
287
        self.to_rem.append(instance)
288
      else:
289
        self.ExecOp(op)
290
        self.to_rem.append(instance)
291
    if self.opts.parallel:
292
      self.ExecJobSet(jobset)
293

    
294
  def GrowDisks(self):
295
    """Grow both the os and the swap disks by the requested amount, if any."""
296
    for instance in self.instances:
297
      for idx, growth in enumerate(self.disk_growth):
298
        if growth > 0:
299
          op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
300
                                  amount=growth, wait_for_sync=True)
301
          Log("- Increase %s's %s disk by %s MB" % (instance, idx, growth))
302
          self.ExecOp(op)
303

    
304
  def ReplaceDisks1D8(self):
305
    """Replace disks on primary and secondary for drbd8."""
306
    for instance in self.instances:
307
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
308
        op = opcodes.OpReplaceDisks(instance_name=instance,
309
                                    mode=mode,
310
                                    disks=[i for i in range(self.disk_count)])
311
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
312
        self.ExecOp(op)
313

    
314
  def ReplaceDisks2(self):
315
    """Replace secondary node."""
316
    mode = constants.REPLACE_DISK_SEC
317

    
318
    mytor = izip(islice(cycle(self.nodes), 2, None),
319
                 self.instances)
320
    for tnode, instance in mytor:
321
      if self.opts.iallocator:
322
        tnode = None
323
      op = opcodes.OpReplaceDisks(instance_name=instance,
324
                                  mode=mode,
325
                                  remote_node=tnode,
326
                                  iallocator=self.opts.iallocator,
327
                                  disks=[i for i in range(self.disk_count)])
328
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
329
      self.ExecOp(op)
330

    
331
  def Failover(self):
332
    """Failover the instances."""
333

    
334
    for instance in self.instances:
335
      op = opcodes.OpFailoverInstance(instance_name=instance,
336
                                      ignore_consistency=False)
337

    
338
      Log("- Failover instance %s" % (instance))
339
      self.ExecOp(op)
340

    
341
  def ImportExport(self):
342
    """Export the instance, delete it, and import it back.
343

    
344
    """
345

    
346
    mytor = izip(cycle(self.nodes),
347
                 islice(cycle(self.nodes), 1, None),
348
                 islice(cycle(self.nodes), 2, None),
349
                 self.instances)
350

    
351
    for pnode, snode, enode, instance in mytor:
352

    
353
      if self.opts.iallocator:
354
        pnode = snode = None
355
        import_log_msg = ("- Import instance %s from node %s (iallocator: %s)" %
356
                          (instance, enode, self.opts.iallocator))
357
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
358
        snode = None
359
        import_log_msg = ("- Import instance %s from node %s to node %s" %
360
                          (instance, enode, pnode))
361
      else:
362
        import_log_msg = ("- Import instance %s from node %s to nodes %s/%s" %
363
                          (instance, enode, pnode, snode))
364

    
365
      exp_op = opcodes.OpExportInstance(instance_name=instance,
366
                                           target_node=enode,
367
                                           shutdown=True)
368
      rem_op = opcodes.OpRemoveInstance(instance_name=instance,
369
                                        ignore_failures=True)
370
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
371
                                           names=[instance])
372
      full_name = self.ExecOp(nam_op)[0][0]
373
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
374
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
375
                                        disk_size=self.opts.os_size,
376
                                        swap_size=self.opts.swap_size,
377
                                        disk_template=self.opts.disk_template,
378
                                        mode=constants.INSTANCE_IMPORT,
379
                                        src_node=enode,
380
                                        src_path=imp_dir,
381
                                        pnode=pnode,
382
                                        snode=snode,
383
                                        start=True,
384
                                        ip_check=True,
385
                                        wait_for_sync=True,
386
                                        mac="auto",
387
                                        file_storage_dir=None,
388
                                        file_driver=None,
389
                                        iallocator=self.opts.iallocator,
390
                                        beparams=self.bep,
391
                                        hvparams=self.hvp,
392
                                        )
393

    
394
      erem_op = opcodes.OpRemoveExport(instance_name=instance)
395

    
396
      Log("- Export instance %s to node %s" % (instance, enode))
397
      self.ExecOp(exp_op)
398
      Log("- Remove instance %s" % (instance))
399
      self.ExecOp(rem_op)
400
      self.to_rem.remove(instance)
401
      Log(import_log_msg)
402
      self.ExecOp(imp_op)
403
      Log("- Remove export of instance %s" % (instance))
404
      self.ExecOp(erem_op)
405

    
406
      self.to_rem.append(instance)
407

    
408
  def StopInstance(self, instance):
409
    """Stop given instance."""
410
    op = opcodes.OpShutdownInstance(instance_name=instance)
411
    Log("- Shutdown instance %s" % instance)
412
    self.ExecOp(op)
413

    
414
  def StartInstance(self, instance):
415
    """Start given instance."""
416
    op = opcodes.OpStartupInstance(instance_name=instance, force=False)
417
    Log("- Start instance %s" % instance)
418
    self.ExecOp(op)
419

    
420
  def RenameInstance(self, instance, instance_new):
421
    """Rename instance."""
422
    op = opcodes.OpRenameInstance(instance_name=instance,
423
                                  new_name=instance_new)
424
    Log("- Rename instance %s to %s" % (instance, instance_new))
425
    self.ExecOp(op)
426

    
427
  def StopStart(self):
428
    """Stop/start the instances."""
429
    for instance in self.instances:
430
      self.StopInstance(instance)
431
      self.StartInstance(instance)
432

    
433
  def Remove(self):
434
    """Remove the instances."""
435
    for instance in self.to_rem:
436
      op = opcodes.OpRemoveInstance(instance_name=instance,
437
                                    ignore_failures=True)
438
      Log("- Remove instance %s" % instance)
439
      self.ExecOp(op)
440

    
441

    
442
  def Rename(self):
443
    """Rename the instances."""
444
    rename = self.opts.rename
445
    for instance in self.instances:
446
      self.StopInstance(instance)
447
      self.RenameInstance(instance, rename)
448
      self.StartInstance(rename)
449
      self.StopInstance(rename)
450
      self.RenameInstance(rename, instance)
451
      self.StartInstance(instance)
452

    
453
  def BurninCluster(self):
454
    """Test a cluster intensively.
455

    
456
    This will create instances and then start/stop/failover them.
457
    It is safe for existing instances but could impact performance.
458

    
459
    """
460

    
461
    opts = self.opts
462

    
463
    Log("- Testing global parameters")
464

    
465
    if (len(self.nodes) == 1 and
466
        opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
467
                                   constants.DT_FILE)):
468
      Log("When one node is available/selected the disk template must"
469
          " be 'diskless', 'file' or 'plain'")
470
      sys.exit(1)
471

    
472
    has_err = True
473
    try:
474
      self.CreateInstances()
475
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
476
        self.ReplaceDisks1D8()
477
      if (opts.do_replace2 and len(self.nodes) > 2 and
478
          opts.disk_template in constants.DTS_NET_MIRROR) :
479
        self.ReplaceDisks2()
480

    
481
      if opts.disk_template != constants.DT_DISKLESS:
482
        self.GrowDisks()
483

    
484
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
485
        self.Failover()
486

    
487
      if opts.do_importexport:
488
        self.ImportExport()
489

    
490
      if opts.do_startstop:
491
        self.StopStart()
492

    
493
      if opts.rename:
494
        self.Rename()
495

    
496
      has_err = False
497
    finally:
498
      if has_err:
499
        Log("Error detected: opcode buffer follows:\n\n")
500
        Log(self.GetFeedbackBuf())
501
        Log("\n\n")
502
      self.Remove()
503

    
504
    return 0
505

    
506

    
507
def main():
508
  """Main function"""
509

    
510
  burner = Burner()
511
  return burner.BurninCluster()
512

    
513

    
514
if __name__ == "__main__":
515
  main()