Statistics
| Branch: | Tag: | Revision:

root / tools / burnin @ b518a14a

History | View | Annotate | Download (18.5 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Burnin program
23

    
24
"""
25

    
26
import os
27
import sys
28
import optparse
29
import time
30
from itertools import izip, islice, cycle
31
from cStringIO import StringIO
32

    
33
from ganeti import opcodes
34
from ganeti import mcpu
35
from ganeti import constants
36
from ganeti import cli
37
from ganeti import errors
38
from ganeti import utils
39

    
40

    
41
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
42

    
43

    
44
def Usage():
45
  """Shows program usage information and exits the program."""
46

    
47
  print >> sys.stderr, "Usage:"
48
  print >> sys.stderr, USAGE
49
  sys.exit(2)
50

    
51

    
52
def Log(msg):
53
  """Simple function that prints out its argument.
54

    
55
  """
56
  print msg
57
  sys.stdout.flush()
58

    
59

    
60
class Burner(object):
61
  """Burner class."""
62

    
63
  def __init__(self):
64
    """Constructor."""
65
    utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
66
    self._feed_buf = StringIO()
67
    self.nodes = []
68
    self.instances = []
69
    self.to_rem = []
70
    self.opts = None
71
    self.cl = cli.GetClient()
72
    self.ParseOptions()
73
    self.GetState()
74

    
75
  def ClearFeedbackBuf(self):
76
    """Clear the feedback buffer."""
77
    self._feed_buf.truncate(0)
78

    
79
  def GetFeedbackBuf(self):
80
    """Return the contents of the buffer."""
81
    return self._feed_buf.getvalue()
82

    
83
  def Feedback(self, msg):
84
    """Acumulate feedback in our buffer."""
85
    self._feed_buf.write("%s %s\n" % (time.ctime(utils.MergeTime(msg[0])),
86
                                      msg[2]))
87
    if self.opts.verbose:
88
      Log(msg)
89

    
90
  def ExecOp(self, op):
91
    """Execute an opcode and manage the exec buffer."""
92
    self.ClearFeedbackBuf()
93
    return cli.SubmitOpCode(op, feedback_fn=self.Feedback, cl=self.cl)
94

    
95
  def ExecJobSet(self, jobs):
96
    """Execute a set of jobs and return once all are done.
97

    
98
    The method will return the list of results, if all jobs are
99
    successfull. Otherwise, OpExecError will be raised from within
100
    cli.py.
101

    
102
    """
103
    self.ClearFeedbackBuf()
104
    job_ids = [cli.SendJob(job, cl=self.cl) for job in jobs]
105
    Log("- Submitted job IDs %s" % ", ".join(job_ids))
106
    results = []
107
    for jid in job_ids:
108
      Log("- Waiting for job %s" % jid)
109
      results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
110

    
111
    return results
112

    
113
  def ParseOptions(self):
114
    """Parses the command line options.
115

    
116
    In case of command line errors, it will show the usage and exit the
117
    program.
118

    
119
    """
120

    
121
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
122
                                   version="%%prog (ganeti) %s" %
123
                                   constants.RELEASE_VERSION,
124
                                   option_class=cli.CliOption)
125

    
126
    parser.add_option("-o", "--os", dest="os", default=None,
127
                      help="OS to use during burnin",
128
                      metavar="<OS>")
129
    parser.add_option("--disk-size", dest="disk_size",
130
                      help="Disk size (determines disk count)",
131
                      default="128m", type="string", metavar="<size,size,...>")
132
    parser.add_option("--disk-growth", dest="disk_growth", help="Disk growth",
133
                      default="128m", type="string", metavar="<size,size,...>")
134
    parser.add_option("--mem-size", dest="mem_size", help="Memory size",
135
                      default=128, type="unit", metavar="<size>")
136
    parser.add_option("-v", "--verbose",
137
                      action="store_true", dest="verbose", default=False,
138
                      help="print command execution messages to stdout")
139
    parser.add_option("--no-replace1", dest="do_replace1",
140
                      help="Skip disk replacement with the same secondary",
141
                      action="store_false", default=True)
142
    parser.add_option("--no-replace2", dest="do_replace2",
143
                      help="Skip disk replacement with a different secondary",
144
                      action="store_false", default=True)
145
    parser.add_option("--no-failover", dest="do_failover",
146
                      help="Skip instance failovers", action="store_false",
147
                      default=True)
148
    parser.add_option("--no-importexport", dest="do_importexport",
149
                      help="Skip instance export/import", action="store_false",
150
                      default=True)
151
    parser.add_option("--no-startstop", dest="do_startstop",
152
                      help="Skip instance stop/start", action="store_false",
153
                      default=True)
154
    parser.add_option("--no-nics", dest="nics",
155
                      help="No network interfaces", action="store_const",
156
                      const=[], default=[{}])
157
    parser.add_option("--rename", dest="rename", default=None,
158
                      help="Give one unused instance name which is taken"
159
                           " to start the renaming sequence",
160
                      metavar="<instance_name>")
161
    parser.add_option("-t", "--disk-template", dest="disk_template",
162
                      choices=("diskless", "file", "plain", "drbd"),
163
                      default="drbd",
164
                      help="Disk template (diskless, file, plain or drbd)"
165
                            " [drbd]")
166
    parser.add_option("-n", "--nodes", dest="nodes", default="",
167
                      help="Comma separated list of nodes to perform"
168
                      " the burnin on (defaults to all nodes)")
169
    parser.add_option("--iallocator", dest="iallocator",
170
                      default=None, type="string",
171
                      help="Perform the allocation using an iallocator"
172
                      " instead of fixed node spread (node restrictions no"
173
                      " longer apply, therefore -n/--nodes must not be used")
174
    parser.add_option("-p", "--parallel", default=False, action="store_true",
175
                      dest="parallel",
176
                      help="Enable parallelization of some operations in"
177
                      " order to speed burnin or to test granular locking")
178

    
179
    options, args = parser.parse_args()
180
    if len(args) < 1 or options.os is None:
181
      Usage()
182

    
183
    supported_disk_templates = (constants.DT_DISKLESS,
184
                                constants.DT_FILE,
185
                                constants.DT_PLAIN,
186
                                constants.DT_DRBD8)
187
    if options.disk_template not in supported_disk_templates:
188
      Log("Unknown disk template '%s'" % options.disk_template)
189
      sys.exit(1)
190

    
191
    if options.disk_template == constants.DT_DISKLESS:
192
      disk_size = disk_growth = []
193
    else:
194
      disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
195
      disk_growth = [utils.ParseUnit(v)
196
                     for v in options.disk_growth.split(",")]
197
      if len(disk_growth) != len(disk_size):
198
        Log("Wrong disk sizes/growth combination")
199
        sys.exit(1)
200
    if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
201
        (not disk_size and options.disk_template != constants.DT_DISKLESS)):
202
      Log("Wrong disk count/disk template combination")
203
      sys.exit(1)
204

    
205
    self.disk_size = disk_size
206
    self.disk_growth = disk_growth
207
    self.disk_count = len(disk_size)
208

    
209
    if options.nodes and options.iallocator:
210
      Log("Give either the nodes option or the iallocator option, not both")
211
      sys.exit(1)
212

    
213
    self.opts = options
214
    self.instances = args
215
    self.bep = {
216
      constants.BE_MEMORY: options.mem_size,
217
      constants.BE_VCPUS: 1,
218
      }
219
    self.hvp = {}
220

    
221
  def GetState(self):
222
    """Read the cluster state from the config."""
223
    if self.opts.nodes:
224
      names = self.opts.nodes.split(",")
225
    else:
226
      names = []
227
    try:
228
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
229
      result = self.ExecOp(op)
230
    except errors.GenericError, err:
231
      err_code, msg = cli.FormatError(err)
232
      Log(msg)
233
      sys.exit(err_code)
234
    self.nodes = [data[0] for data in result]
235

    
236
    result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
237
                                              names=[]))
238

    
239
    if not result:
240
      Log("Can't get the OS list")
241
      sys.exit(1)
242

    
243
    # filter non-valid OS-es
244
    os_set = [val[0] for val in result if val[1]]
245

    
246
    if self.opts.os not in os_set:
247
      Log("OS '%s' not found" % self.opts.os)
248
      sys.exit(1)
249

    
250
  def CreateInstances(self):
251
    """Create the given instances.
252

    
253
    """
254
    self.to_rem = []
255
    mytor = izip(cycle(self.nodes),
256
                 islice(cycle(self.nodes), 1, None),
257
                 self.instances)
258
    jobset = []
259

    
260
    for pnode, snode, instance in mytor:
261
      if self.opts.iallocator:
262
        pnode = snode = None
263
        Log("- Add instance %s (iallocator: %s)" %
264
              (instance, self.opts.iallocator))
265
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
266
        snode = None
267
        Log("- Add instance %s on node %s" % (instance, pnode))
268
      else:
269
        Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode))
270

    
271
      op = opcodes.OpCreateInstance(instance_name=instance,
272
                                    disks = [ {"size": size}
273
                                              for size in self.disk_size],
274
                                    disk_template=self.opts.disk_template,
275
                                    nics=self.opts.nics,
276
                                    mode=constants.INSTANCE_CREATE,
277
                                    os_type=self.opts.os,
278
                                    pnode=pnode,
279
                                    snode=snode,
280
                                    start=True,
281
                                    ip_check=True,
282
                                    wait_for_sync=True,
283
                                    file_driver="loop",
284
                                    file_storage_dir=None,
285
                                    iallocator=self.opts.iallocator,
286
                                    beparams=self.bep,
287
                                    hvparams=self.hvp,
288
                                    )
289

    
290
      if self.opts.parallel:
291
        jobset.append([op])
292
        # FIXME: here we should not append to to_rem uncoditionally,
293
        # but only when the job is successful
294
        self.to_rem.append(instance)
295
      else:
296
        self.ExecOp(op)
297
        self.to_rem.append(instance)
298
    if self.opts.parallel:
299
      self.ExecJobSet(jobset)
300

    
301
  def GrowDisks(self):
302
    """Grow both the os and the swap disks by the requested amount, if any."""
303
    for instance in self.instances:
304
      for idx, growth in enumerate(self.disk_growth):
305
        if growth > 0:
306
          op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
307
                                  amount=growth, wait_for_sync=True)
308
          Log("- Increase %s's disk/%s by %s MB" % (instance, idx, growth))
309
          self.ExecOp(op)
310

    
311
  def ReplaceDisks1D8(self):
312
    """Replace disks on primary and secondary for drbd8."""
313
    for instance in self.instances:
314
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
315
        op = opcodes.OpReplaceDisks(instance_name=instance,
316
                                    mode=mode,
317
                                    disks=[i for i in range(self.disk_count)])
318
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
319
        self.ExecOp(op)
320

    
321
  def ReplaceDisks2(self):
322
    """Replace secondary node."""
323
    mode = constants.REPLACE_DISK_SEC
324

    
325
    mytor = izip(islice(cycle(self.nodes), 2, None),
326
                 self.instances)
327
    for tnode, instance in mytor:
328
      if self.opts.iallocator:
329
        tnode = None
330
      op = opcodes.OpReplaceDisks(instance_name=instance,
331
                                  mode=mode,
332
                                  remote_node=tnode,
333
                                  iallocator=self.opts.iallocator,
334
                                  disks=[i for i in range(self.disk_count)])
335
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
336
      self.ExecOp(op)
337

    
338
  def Failover(self):
339
    """Failover the instances."""
340

    
341
    for instance in self.instances:
342
      op = opcodes.OpFailoverInstance(instance_name=instance,
343
                                      ignore_consistency=False)
344

    
345
      Log("- Failover instance %s" % (instance))
346
      self.ExecOp(op)
347

    
348
  def ImportExport(self):
349
    """Export the instance, delete it, and import it back.
350

    
351
    """
352

    
353
    mytor = izip(cycle(self.nodes),
354
                 islice(cycle(self.nodes), 1, None),
355
                 islice(cycle(self.nodes), 2, None),
356
                 self.instances)
357

    
358
    for pnode, snode, enode, instance in mytor:
359

    
360
      if self.opts.iallocator:
361
        pnode = snode = None
362
        import_log_msg = ("- Import instance %s from node %s (iallocator: %s)" %
363
                          (instance, enode, self.opts.iallocator))
364
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
365
        snode = None
366
        import_log_msg = ("- Import instance %s from node %s to node %s" %
367
                          (instance, enode, pnode))
368
      else:
369
        import_log_msg = ("- Import instance %s from node %s to nodes %s/%s" %
370
                          (instance, enode, pnode, snode))
371

    
372
      exp_op = opcodes.OpExportInstance(instance_name=instance,
373
                                           target_node=enode,
374
                                           shutdown=True)
375
      rem_op = opcodes.OpRemoveInstance(instance_name=instance,
376
                                        ignore_failures=True)
377
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
378
                                           names=[instance])
379
      full_name = self.ExecOp(nam_op)[0][0]
380
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
381
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
382
                                        disks = [ {"size": size}
383
                                                  for size in self.disk_size],
384
                                        disk_template=self.opts.disk_template,
385
                                        nics=self.opts.nics,
386
                                        mode=constants.INSTANCE_IMPORT,
387
                                        src_node=enode,
388
                                        src_path=imp_dir,
389
                                        pnode=pnode,
390
                                        snode=snode,
391
                                        start=True,
392
                                        ip_check=True,
393
                                        wait_for_sync=True,
394
                                        file_storage_dir=None,
395
                                        file_driver="loop",
396
                                        iallocator=self.opts.iallocator,
397
                                        beparams=self.bep,
398
                                        hvparams=self.hvp,
399
                                        )
400

    
401
      erem_op = opcodes.OpRemoveExport(instance_name=instance)
402

    
403
      Log("- Export instance %s to node %s" % (instance, enode))
404
      self.ExecOp(exp_op)
405
      Log("- Remove instance %s" % (instance))
406
      self.ExecOp(rem_op)
407
      self.to_rem.remove(instance)
408
      Log(import_log_msg)
409
      self.ExecOp(imp_op)
410
      Log("- Remove export of instance %s" % (instance))
411
      self.ExecOp(erem_op)
412

    
413
      self.to_rem.append(instance)
414

    
415
  def StopInstance(self, instance):
416
    """Stop given instance."""
417
    op = opcodes.OpShutdownInstance(instance_name=instance)
418
    Log("- Shutdown instance %s" % instance)
419
    self.ExecOp(op)
420

    
421
  def StartInstance(self, instance):
422
    """Start given instance."""
423
    op = opcodes.OpStartupInstance(instance_name=instance, force=False)
424
    Log("- Start instance %s" % instance)
425
    self.ExecOp(op)
426

    
427
  def RenameInstance(self, instance, instance_new):
428
    """Rename instance."""
429
    op = opcodes.OpRenameInstance(instance_name=instance,
430
                                  new_name=instance_new)
431
    Log("- Rename instance %s to %s" % (instance, instance_new))
432
    self.ExecOp(op)
433

    
434
  def StopStart(self):
435
    """Stop/start the instances."""
436
    for instance in self.instances:
437
      self.StopInstance(instance)
438
      self.StartInstance(instance)
439

    
440
  def Remove(self):
441
    """Remove the instances."""
442
    for instance in self.to_rem:
443
      op = opcodes.OpRemoveInstance(instance_name=instance,
444
                                    ignore_failures=True)
445
      Log("- Remove instance %s" % instance)
446
      self.ExecOp(op)
447

    
448

    
449
  def Rename(self):
450
    """Rename the instances."""
451
    rename = self.opts.rename
452
    for instance in self.instances:
453
      self.StopInstance(instance)
454
      self.RenameInstance(instance, rename)
455
      self.StartInstance(rename)
456
      self.StopInstance(rename)
457
      self.RenameInstance(rename, instance)
458
      self.StartInstance(instance)
459

    
460
  def BurninCluster(self):
461
    """Test a cluster intensively.
462

    
463
    This will create instances and then start/stop/failover them.
464
    It is safe for existing instances but could impact performance.
465

    
466
    """
467

    
468
    opts = self.opts
469

    
470
    Log("- Testing global parameters")
471

    
472
    if (len(self.nodes) == 1 and
473
        opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
474
                                   constants.DT_FILE)):
475
      Log("When one node is available/selected the disk template must"
476
          " be 'diskless', 'file' or 'plain'")
477
      sys.exit(1)
478

    
479
    has_err = True
480
    try:
481
      self.CreateInstances()
482
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
483
        self.ReplaceDisks1D8()
484
      if (opts.do_replace2 and len(self.nodes) > 2 and
485
          opts.disk_template in constants.DTS_NET_MIRROR) :
486
        self.ReplaceDisks2()
487

    
488
      if opts.disk_template != constants.DT_DISKLESS:
489
        self.GrowDisks()
490

    
491
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
492
        self.Failover()
493

    
494
      if opts.do_importexport:
495
        self.ImportExport()
496

    
497
      if opts.do_startstop:
498
        self.StopStart()
499

    
500
      if opts.rename:
501
        self.Rename()
502

    
503
      has_err = False
504
    finally:
505
      if has_err:
506
        Log("Error detected: opcode buffer follows:\n\n")
507
        Log(self.GetFeedbackBuf())
508
        Log("\n\n")
509
      self.Remove()
510

    
511
    return 0
512

    
513

    
514
def main():
515
  """Main function"""
516

    
517
  burner = Burner()
518
  return burner.BurninCluster()
519

    
520

    
521
if __name__ == "__main__":
522
  main()