Statistics
| Branch: | Tag: | Revision:

root / tools / burnin @ 82d9caef

History | View | Annotate | Download (17.8 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Burnin program
23

    
24
"""
25

    
26
import os
27
import sys
28
import optparse
29
import time
30
from itertools import izip, islice, cycle
31
from cStringIO import StringIO
32

    
33
from ganeti import opcodes
34
from ganeti import mcpu
35
from ganeti import constants
36
from ganeti import cli
37
from ganeti import errors
38
from ganeti import utils
39

    
40

    
41
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
42

    
43

    
44
def Usage():
45
  """Shows program usage information and exits the program."""
46

    
47
  print >> sys.stderr, "Usage:"
48
  print >> sys.stderr, USAGE
49
  sys.exit(2)
50

    
51

    
52
def Log(msg):
53
  """Simple function that prints out its argument.
54

    
55
  """
56
  print msg
57
  sys.stdout.flush()
58

    
59

    
60
class Burner(object):
61
  """Burner class."""
62

    
63
  def __init__(self):
64
    """Constructor."""
65
    utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
66
    self._feed_buf = StringIO()
67
    self.nodes = []
68
    self.instances = []
69
    self.to_rem = []
70
    self.opts = None
71
    self.cl = cli.GetClient()
72
    self.ParseOptions()
73
    self.GetState()
74

    
75
  def ClearFeedbackBuf(self):
76
    """Clear the feedback buffer."""
77
    self._feed_buf.truncate(0)
78

    
79
  def GetFeedbackBuf(self):
80
    """Return the contents of the buffer."""
81
    return self._feed_buf.getvalue()
82

    
83
  def Feedback(self, msg):
84
    """Acumulate feedback in our buffer."""
85
    self._feed_buf.write("%s %s\n" % (time.ctime(utils.MergeTime(msg[0])),
86
                                      msg[2]))
87
    if self.opts.verbose:
88
      Log(msg)
89

    
90
  def ExecOp(self, op):
91
    """Execute an opcode and manage the exec buffer."""
92
    self.ClearFeedbackBuf()
93
    return cli.SubmitOpCode(op, feedback_fn=self.Feedback, cl=self.cl)
94

    
95
  def ExecJobSet(self, jobs):
96
    """Execute a set of jobs and return once all are done.
97

    
98
    The method will return the list of results, if all jobs are
99
    successfull. Otherwise, OpExecError will be raised from within
100
    cli.py.
101

    
102
    """
103
    self.ClearFeedbackBuf()
104
    job_ids = [cli.SendJob(job, cl=self.cl) for job in jobs]
105
    Log("- Submitted job IDs %s" % ", ".join(job_ids))
106
    results = []
107
    for jid in job_ids:
108
      Log("- Waiting for job %s" % jid)
109
      results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
110

    
111
    return results
112

    
113
  def ParseOptions(self):
114
    """Parses the command line options.
115

    
116
    In case of command line errors, it will show the usage and exit the
117
    program.
118

    
119
    """
120

    
121
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
122
                                   version="%%prog (ganeti) %s" %
123
                                   constants.RELEASE_VERSION,
124
                                   option_class=cli.CliOption)
125

    
126
    parser.add_option("-o", "--os", dest="os", default=None,
127
                      help="OS to use during burnin",
128
                      metavar="<OS>")
129
    parser.add_option("--os-size", dest="os_size", help="Disk size",
130
                      default=4 * 1024, type="unit", metavar="<size>")
131
    parser.add_option("--os-growth", dest="sda_growth", help="Disk growth",
132
                      default=1024, type="unit", metavar="<size>")
133
    parser.add_option("--swap-size", dest="swap_size", help="Swap size",
134
                      default=4 * 1024, type="unit", metavar="<size>")
135
    parser.add_option("--swap-growth", dest="sdb_growth", help="Swap growth",
136
                      default=1024, type="unit", metavar="<size>")
137
    parser.add_option("--mem-size", dest="mem_size", help="Memory size",
138
                      default=128, type="unit", metavar="<size>")
139
    parser.add_option("-v", "--verbose",
140
                      action="store_true", dest="verbose", default=False,
141
                      help="print command execution messages to stdout")
142
    parser.add_option("--no-replace1", dest="do_replace1",
143
                      help="Skip disk replacement with the same secondary",
144
                      action="store_false", default=True)
145
    parser.add_option("--no-replace2", dest="do_replace2",
146
                      help="Skip disk replacement with a different secondary",
147
                      action="store_false", default=True)
148
    parser.add_option("--no-failover", dest="do_failover",
149
                      help="Skip instance failovers", action="store_false",
150
                      default=True)
151
    parser.add_option("--no-importexport", dest="do_importexport",
152
                      help="Skip instance export/import", action="store_false",
153
                      default=True)
154
    parser.add_option("--no-startstop", dest="do_startstop",
155
                      help="Skip instance stop/start", action="store_false",
156
                      default=True)
157
    parser.add_option("--rename", dest="rename", default=None,
158
                      help="Give one unused instance name which is taken"
159
                           " to start the renaming sequence",
160
                      metavar="<instance_name>")
161
    parser.add_option("-t", "--disk-template", dest="disk_template",
162
                      choices=("diskless", "file", "plain", "drbd"),
163
                      default="drbd",
164
                      help="Disk template (diskless, file, plain or drbd)"
165
                            " [drbd]")
166
    parser.add_option("-n", "--nodes", dest="nodes", default="",
167
                      help="Comma separated list of nodes to perform"
168
                      " the burnin on (defaults to all nodes)")
169
    parser.add_option("--iallocator", dest="iallocator",
170
                      default=None, type="string",
171
                      help="Perform the allocation using an iallocator"
172
                      " instead of fixed node spread (node restrictions no"
173
                      " longer apply, therefore -n/--nodes must not be used")
174
    parser.add_option("-p", "--parallel", default=False, action="store_true",
175
                      dest="parallel",
176
                      help="Enable parallelization of some operations in"
177
                      " order to speed burnin or to test granular locking")
178

    
179
    options, args = parser.parse_args()
180
    if len(args) < 1 or options.os is None:
181
      Usage()
182

    
183
    supported_disk_templates = (constants.DT_DISKLESS,
184
                                constants.DT_FILE,
185
                                constants.DT_PLAIN,
186
                                constants.DT_DRBD8)
187
    if options.disk_template not in supported_disk_templates:
188
      Log("Unknown disk template '%s'" % options.disk_template)
189
      sys.exit(1)
190

    
191
    if options.nodes and options.iallocator:
192
      Log("Give either the nodes option or the iallocator option, not both")
193
      sys.exit(1)
194

    
195
    self.opts = options
196
    self.instances = args
197
    self.bep = {
198
      constants.BE_MEMORY: options.mem_size,
199
      constants.BE_VCPUS: 1,
200
      }
201
    self.hvp = {}
202

    
203
  def GetState(self):
204
    """Read the cluster state from the config."""
205
    if self.opts.nodes:
206
      names = self.opts.nodes.split(",")
207
    else:
208
      names = []
209
    try:
210
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
211
      result = self.ExecOp(op)
212
    except errors.GenericError, err:
213
      err_code, msg = cli.FormatError(err)
214
      Log(msg)
215
      sys.exit(err_code)
216
    self.nodes = [data[0] for data in result]
217

    
218
    result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
219
                                              names=[]))
220

    
221
    if not result:
222
      Log("Can't get the OS list")
223
      sys.exit(1)
224

    
225
    # filter non-valid OS-es
226
    os_set = [val[0] for val in result if val[1]]
227

    
228
    if self.opts.os not in os_set:
229
      Log("OS '%s' not found" % self.opts.os)
230
      sys.exit(1)
231

    
232
  def CreateInstances(self):
233
    """Create the given instances.
234

    
235
    """
236
    self.to_rem = []
237
    mytor = izip(cycle(self.nodes),
238
                 islice(cycle(self.nodes), 1, None),
239
                 self.instances)
240
    jobset = []
241

    
242
    for pnode, snode, instance in mytor:
243
      if self.opts.iallocator:
244
        pnode = snode = None
245
        Log("- Add instance %s (iallocator: %s)" %
246
              (instance, self.opts.iallocator))
247
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
248
        snode = None
249
        Log("- Add instance %s on node %s" % (instance, pnode))
250
      else:
251
        Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode))
252

    
253
      op = opcodes.OpCreateInstance(instance_name=instance,
254
                                    disk_size=self.opts.os_size,
255
                                    swap_size=self.opts.swap_size,
256
                                    disk_template=self.opts.disk_template,
257
                                    mode=constants.INSTANCE_CREATE,
258
                                    os_type=self.opts.os,
259
                                    pnode=pnode,
260
                                    snode=snode,
261
                                    start=True,
262
                                    ip_check=True,
263
                                    wait_for_sync=True,
264
                                    mac="auto",
265
                                    file_driver="loop",
266
                                    file_storage_dir=None,
267
                                    iallocator=self.opts.iallocator,
268
                                    beparams=self.bep,
269
                                    hvparams=self.hvp,
270
                                    )
271

    
272
      if self.opts.parallel:
273
        jobset.append([op])
274
        # FIXME: here we should not append to to_rem uncoditionally,
275
        # but only when the job is successful
276
        self.to_rem.append(instance)
277
      else:
278
        self.ExecOp(op)
279
        self.to_rem.append(instance)
280
    if self.opts.parallel:
281
      self.ExecJobSet(jobset)
282

    
283
  def GrowDisks(self):
284
    """Grow both the os and the swap disks by the requested amount, if any."""
285
    for instance in self.instances:
286
      for disk in ['sda', 'sdb']:
287
        growth = getattr(self.opts, '%s_growth' % disk)
288
        if growth > 0:
289
          op = opcodes.OpGrowDisk(instance_name=instance, disk=disk,
290
                                  amount=growth, wait_for_sync=True)
291
          Log("- Increase %s's %s disk by %s MB" % (instance, disk, growth))
292
          self.ExecOp(op)
293

    
294
  def ReplaceDisks1D8(self):
295
    """Replace disks on primary and secondary for drbd8."""
296
    for instance in self.instances:
297
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
298
        op = opcodes.OpReplaceDisks(instance_name=instance,
299
                                    mode=mode,
300
                                    disks=["sda", "sdb"])
301
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
302
        self.ExecOp(op)
303

    
304
  def ReplaceDisks2(self):
305
    """Replace secondary node."""
306
    mode = constants.REPLACE_DISK_SEC
307

    
308
    mytor = izip(islice(cycle(self.nodes), 2, None),
309
                 self.instances)
310
    for tnode, instance in mytor:
311
      if self.opts.iallocator:
312
        tnode = None
313
      op = opcodes.OpReplaceDisks(instance_name=instance,
314
                                  mode=mode,
315
                                  remote_node=tnode,
316
                                  iallocator=self.opts.iallocator,
317
                                  disks=["sda", "sdb"])
318
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
319
      self.ExecOp(op)
320

    
321
  def Failover(self):
322
    """Failover the instances."""
323

    
324
    for instance in self.instances:
325
      op = opcodes.OpFailoverInstance(instance_name=instance,
326
                                      ignore_consistency=False)
327

    
328
      Log("- Failover instance %s" % (instance))
329
      self.ExecOp(op)
330

    
331
  def ImportExport(self):
332
    """Export the instance, delete it, and import it back.
333

    
334
    """
335

    
336
    mytor = izip(cycle(self.nodes),
337
                 islice(cycle(self.nodes), 1, None),
338
                 islice(cycle(self.nodes), 2, None),
339
                 self.instances)
340

    
341
    for pnode, snode, enode, instance in mytor:
342

    
343
      if self.opts.iallocator:
344
        pnode = snode = None
345
        import_log_msg = ("- Import instance %s from node %s (iallocator: %s)" %
346
                          (instance, enode, self.opts.iallocator))
347
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
348
        snode = None
349
        import_log_msg = ("- Import instance %s from node %s to node %s" %
350
                          (instance, enode, pnode))
351
      else:
352
        import_log_msg = ("- Import instance %s from node %s to nodes %s/%s" %
353
                          (instance, enode, pnode, snode))
354

    
355
      exp_op = opcodes.OpExportInstance(instance_name=instance,
356
                                           target_node=enode,
357
                                           shutdown=True)
358
      rem_op = opcodes.OpRemoveInstance(instance_name=instance,
359
                                        ignore_failures=True)
360
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
361
                                           names=[instance])
362
      full_name = self.ExecOp(nam_op)[0][0]
363
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
364
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
365
                                        disk_size=self.opts.os_size,
366
                                        swap_size=self.opts.swap_size,
367
                                        disk_template=self.opts.disk_template,
368
                                        mode=constants.INSTANCE_IMPORT,
369
                                        src_node=enode,
370
                                        src_path=imp_dir,
371
                                        pnode=pnode,
372
                                        snode=snode,
373
                                        start=True,
374
                                        ip_check=True,
375
                                        wait_for_sync=True,
376
                                        mac="auto",
377
                                        file_storage_dir=None,
378
                                        file_driver=None,
379
                                        iallocator=self.opts.iallocator,
380
                                        beparams=self.bep,
381
                                        hvparams=self.hvp,
382
                                        )
383

    
384
      erem_op = opcodes.OpRemoveExport(instance_name=instance)
385

    
386
      Log("- Export instance %s to node %s" % (instance, enode))
387
      self.ExecOp(exp_op)
388
      Log("- Remove instance %s" % (instance))
389
      self.ExecOp(rem_op)
390
      self.to_rem.remove(instance)
391
      Log(import_log_msg)
392
      self.ExecOp(imp_op)
393
      Log("- Remove export of instance %s" % (instance))
394
      self.ExecOp(erem_op)
395

    
396
      self.to_rem.append(instance)
397

    
398
  def StopInstance(self, instance):
399
    """Stop given instance."""
400
    op = opcodes.OpShutdownInstance(instance_name=instance)
401
    Log("- Shutdown instance %s" % instance)
402
    self.ExecOp(op)
403

    
404
  def StartInstance(self, instance):
405
    """Start given instance."""
406
    op = opcodes.OpStartupInstance(instance_name=instance, force=False)
407
    Log("- Start instance %s" % instance)
408
    self.ExecOp(op)
409

    
410
  def RenameInstance(self, instance, instance_new):
411
    """Rename instance."""
412
    op = opcodes.OpRenameInstance(instance_name=instance,
413
                                  new_name=instance_new)
414
    Log("- Rename instance %s to %s" % (instance, instance_new))
415
    self.ExecOp(op)
416

    
417
  def StopStart(self):
418
    """Stop/start the instances."""
419
    for instance in self.instances:
420
      self.StopInstance(instance)
421
      self.StartInstance(instance)
422

    
423
  def Remove(self):
424
    """Remove the instances."""
425
    for instance in self.to_rem:
426
      op = opcodes.OpRemoveInstance(instance_name=instance,
427
                                    ignore_failures=True)
428
      Log("- Remove instance %s" % instance)
429
      self.ExecOp(op)
430

    
431

    
432
  def Rename(self):
433
    """Rename the instances."""
434
    rename = self.opts.rename
435
    for instance in self.instances:
436
      self.StopInstance(instance)
437
      self.RenameInstance(instance, rename)
438
      self.StartInstance(rename)
439
      self.StopInstance(rename)
440
      self.RenameInstance(rename, instance)
441
      self.StartInstance(instance)
442

    
443
  def BurninCluster(self):
444
    """Test a cluster intensively.
445

    
446
    This will create instances and then start/stop/failover them.
447
    It is safe for existing instances but could impact performance.
448

    
449
    """
450

    
451
    opts = self.opts
452

    
453
    Log("- Testing global parameters")
454

    
455
    if (len(self.nodes) == 1 and
456
        opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
457
                                   constants.DT_FILE)):
458
      Log("When one node is available/selected the disk template must"
459
          " be 'diskless', 'file' or 'plain'")
460
      sys.exit(1)
461

    
462
    has_err = True
463
    try:
464
      self.CreateInstances()
465
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
466
        self.ReplaceDisks1D8()
467
      if (opts.do_replace2 and len(self.nodes) > 2 and
468
          opts.disk_template in constants.DTS_NET_MIRROR) :
469
        self.ReplaceDisks2()
470

    
471
      if opts.disk_template != constants.DT_DISKLESS:
472
        self.GrowDisks()
473

    
474
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
475
        self.Failover()
476

    
477
      if opts.do_importexport:
478
        self.ImportExport()
479

    
480
      if opts.do_startstop:
481
        self.StopStart()
482

    
483
      if opts.rename:
484
        self.Rename()
485

    
486
      has_err = False
487
    finally:
488
      if has_err:
489
        Log("Error detected: opcode buffer follows:\n\n")
490
        Log(self.GetFeedbackBuf())
491
        Log("\n\n")
492
      self.Remove()
493

    
494
    return 0
495

    
496

    
497
def main():
498
  """Main function"""
499

    
500
  burner = Burner()
501
  return burner.BurninCluster()
502

    
503

    
504
if __name__ == "__main__":
505
  main()