Statistics
| Branch: | Tag: | Revision:

root / tools / burnin @ ec5c88dc

History | View | Annotate | Download (17.3 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Burnin program
23

    
24
"""
25

    
26
import os
27
import sys
28
import optparse
29
import time
30
from itertools import izip, islice, cycle
31
from cStringIO import StringIO
32

    
33
from ganeti import opcodes
34
from ganeti import mcpu
35
from ganeti import constants
36
from ganeti import cli
37
from ganeti import logger
38
from ganeti import errors
39
from ganeti import utils
40

    
41

    
42
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
43

    
44

    
45
def Usage():
46
  """Shows program usage information and exits the program."""
47

    
48
  print >> sys.stderr, "Usage:"
49
  print >> sys.stderr, USAGE
50
  sys.exit(2)
51

    
52

    
53
def Log(msg):
54
  """Simple function that prints out its argument.
55

    
56
  """
57
  print msg
58
  sys.stdout.flush()
59

    
60

    
61
class Burner(object):
62
  """Burner class."""
63

    
64
  def __init__(self):
65
    """Constructor."""
66
    logger.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
67
    self._feed_buf = StringIO()
68
    self.nodes = []
69
    self.instances = []
70
    self.to_rem = []
71
    self.opts = None
72
    self.cl = cli.GetClient()
73
    self.ParseOptions()
74
    self.GetState()
75

    
76
  def ClearFeedbackBuf(self):
77
    """Clear the feedback buffer."""
78
    self._feed_buf.truncate(0)
79

    
80
  def GetFeedbackBuf(self):
81
    """Return the contents of the buffer."""
82
    return self._feed_buf.getvalue()
83

    
84
  def Feedback(self, msg):
85
    """Acumulate feedback in our buffer."""
86
    self._feed_buf.write("%s %s\n" % (time.ctime(utils.MergeTime(msg[0])),
87
                                      msg[2]))
88
    if self.opts.verbose:
89
      Log(msg)
90

    
91
  def ExecOp(self, op):
92
    """Execute an opcode and manage the exec buffer."""
93
    self.ClearFeedbackBuf()
94
    return cli.SubmitOpCode(op, feedback_fn=self.Feedback, cl=self.cl)
95

    
96
  def ExecJobSet(self, jobs):
97
    """Execute a set of jobs and return once all are done.
98

    
99
    The method will return the list of results, if all jobs are
100
    successfull. Otherwise, OpExecError will be raised from within
101
    cli.py.
102

    
103
    """
104
    self.ClearFeedbackBuf()
105
    job_ids = [cli.SendJob(job, cl=self.cl) for job in jobs]
106
    Log("- Submitted job IDs %s" % ", ".join(job_ids))
107
    results = []
108
    for jid in job_ids:
109
      Log("- Waiting for job %s" % jid)
110
      results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
111

    
112
    return results
113

    
114
  def ParseOptions(self):
115
    """Parses the command line options.
116

    
117
    In case of command line errors, it will show the usage and exit the
118
    program.
119

    
120
    """
121

    
122
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
123
                                   version="%%prog (ganeti) %s" %
124
                                   constants.RELEASE_VERSION,
125
                                   option_class=cli.CliOption)
126

    
127
    parser.add_option("-o", "--os", dest="os", default=None,
128
                      help="OS to use during burnin",
129
                      metavar="<OS>")
130
    parser.add_option("--os-size", dest="os_size", help="Disk size",
131
                      default=4 * 1024, type="unit", metavar="<size>")
132
    parser.add_option("--swap-size", dest="swap_size", help="Swap size",
133
                      default=4 * 1024, type="unit", metavar="<size>")
134
    parser.add_option("--mem-size", dest="mem_size", help="Memory size",
135
                      default=128, type="unit", metavar="<size>")
136
    parser.add_option("-v", "--verbose",
137
                      action="store_true", dest="verbose", default=False,
138
                      help="print command execution messages to stdout")
139
    parser.add_option("--no-replace1", dest="do_replace1",
140
                      help="Skip disk replacement with the same secondary",
141
                      action="store_false", default=True)
142
    parser.add_option("--no-replace2", dest="do_replace2",
143
                      help="Skip disk replacement with a different secondary",
144
                      action="store_false", default=True)
145
    parser.add_option("--no-failover", dest="do_failover",
146
                      help="Skip instance failovers", action="store_false",
147
                      default=True)
148
    parser.add_option("--no-importexport", dest="do_importexport",
149
                      help="Skip instance export/import", action="store_false",
150
                      default=True)
151
    parser.add_option("--no-startstop", dest="do_startstop",
152
                      help="Skip instance stop/start", action="store_false",
153
                      default=True)
154
    parser.add_option("--rename", dest="rename", default=None,
155
                      help="Give one unused instance name which is taken"
156
                           " to start the renaming sequence",
157
                      metavar="<instance_name>")
158
    parser.add_option("-t", "--disk-template", dest="disk_template",
159
                      choices=("diskless", "file", "plain", "drbd"),
160
                      default="drbd",
161
                      help="Disk template (diskless, file, plain or drbd)"
162
                            " [drbd]")
163
    parser.add_option("-n", "--nodes", dest="nodes", default="",
164
                      help="Comma separated list of nodes to perform"
165
                      " the burnin on (defaults to all nodes)")
166
    parser.add_option("--iallocator", dest="iallocator",
167
                      default=None, type="string",
168
                      help="Perform the allocation using an iallocator"
169
                      " instead of fixed node spread (node restrictions no"
170
                      " longer apply, therefore -n/--nodes must not be used")
171
    parser.add_option("-p", "--parallel", default=False, action="store_true",
172
                      dest="parallel",
173
                      help="Enable parallelization of some operations in"
174
                      " order to speed burnin or to test granular locking")
175

    
176
    options, args = parser.parse_args()
177
    if len(args) < 1 or options.os is None:
178
      Usage()
179

    
180
    supported_disk_templates = (constants.DT_DISKLESS,
181
                                constants.DT_FILE,
182
                                constants.DT_PLAIN,
183
                                constants.DT_DRBD8)
184
    if options.disk_template not in supported_disk_templates:
185
      Log("Unknown disk template '%s'" % options.disk_template)
186
      sys.exit(1)
187

    
188
    if options.nodes and options.iallocator:
189
      Log("Give either the nodes option or the iallocator option, not both")
190
      sys.exit(1)
191

    
192
    self.opts = options
193
    self.instances = args
194

    
195
  def GetState(self):
196
    """Read the cluster state from the config."""
197
    if self.opts.nodes:
198
      names = self.opts.nodes.split(",")
199
    else:
200
      names = []
201
    try:
202
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
203
      result = self.ExecOp(op)
204
    except errors.GenericError, err:
205
      err_code, msg = cli.FormatError(err)
206
      Log(msg)
207
      sys.exit(err_code)
208
    self.nodes = [data[0] for data in result]
209

    
210
    result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
211
                                              names=[]))
212

    
213
    if not result:
214
      Log("Can't get the OS list")
215
      sys.exit(1)
216

    
217
    # filter non-valid OS-es
218
    os_set = [val[0] for val in result if val[1]]
219

    
220
    if self.opts.os not in os_set:
221
      Log("OS '%s' not found" % self.opts.os)
222
      sys.exit(1)
223

    
224
  def CreateInstances(self):
225
    """Create the given instances.
226

    
227
    """
228
    self.to_rem = []
229
    mytor = izip(cycle(self.nodes),
230
                 islice(cycle(self.nodes), 1, None),
231
                 self.instances)
232
    jobset = []
233
    for pnode, snode, instance in mytor:
234
      if self.opts.iallocator:
235
        pnode = snode = None
236
        Log("- Add instance %s (iallocator: %s)" %
237
              (instance, self.opts.iallocator))
238
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
239
        snode = None
240
        Log("- Add instance %s on node %s" % (instance, pnode))
241
      else:
242
        Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode))
243

    
244
      op = opcodes.OpCreateInstance(instance_name=instance,
245
                                    mem_size=self.opts.mem_size,
246
                                    disk_size=self.opts.os_size,
247
                                    swap_size=self.opts.swap_size,
248
                                    disk_template=self.opts.disk_template,
249
                                    mode=constants.INSTANCE_CREATE,
250
                                    os_type=self.opts.os,
251
                                    pnode=pnode,
252
                                    snode=snode,
253
                                    vcpus=1,
254
                                    start=True,
255
                                    ip_check=True,
256
                                    wait_for_sync=True,
257
                                    mac="auto",
258
                                    kernel_path=None,
259
                                    initrd_path=None,
260
                                    hvm_boot_order=None,
261
                                    file_driver="loop",
262
                                    file_storage_dir=None,
263
                                    iallocator=self.opts.iallocator,
264
                                    hvm_nic_type=constants.HT_HVM_NIC_RTL8139,
265
                                    hvm_disk_type=constants.HT_HVM_DEV_IOEMU)
266

    
267
      if self.opts.parallel:
268
        jobset.append([op])
269
        # FIXME: here we should not append to to_rem uncoditionally,
270
        # but only when the job is successful
271
        self.to_rem.append(instance)
272
      else:
273
        self.ExecOp(op)
274
        self.to_rem.append(instance)
275
    if self.opts.parallel:
276
      self.ExecJobSet(jobset)
277

    
278
  def ReplaceDisks1D8(self):
279
    """Replace disks on primary and secondary for drbd8."""
280
    for instance in self.instances:
281
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
282
        op = opcodes.OpReplaceDisks(instance_name=instance,
283
                                    mode=mode,
284
                                    disks=["sda", "sdb"])
285
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
286
        self.ExecOp(op)
287

    
288
  def ReplaceDisks2(self):
289
    """Replace secondary node."""
290
    mode = constants.REPLACE_DISK_SEC
291

    
292
    mytor = izip(islice(cycle(self.nodes), 2, None),
293
                 self.instances)
294
    for tnode, instance in mytor:
295
      if self.opts.iallocator:
296
        tnode = None
297
      op = opcodes.OpReplaceDisks(instance_name=instance,
298
                                  mode=mode,
299
                                  remote_node=tnode,
300
                                  iallocator=self.opts.iallocator,
301
                                  disks=["sda", "sdb"])
302
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
303
      self.ExecOp(op)
304

    
305
  def Failover(self):
306
    """Failover the instances."""
307

    
308
    for instance in self.instances:
309
      op = opcodes.OpFailoverInstance(instance_name=instance,
310
                                      ignore_consistency=False)
311

    
312
      Log("- Failover instance %s" % (instance))
313
      self.ExecOp(op)
314

    
315
  def ImportExport(self):
316
    """Export the instance, delete it, and import it back.
317

    
318
    """
319

    
320
    mytor = izip(cycle(self.nodes),
321
                 islice(cycle(self.nodes), 1, None),
322
                 islice(cycle(self.nodes), 2, None),
323
                 self.instances)
324

    
325
    for pnode, snode, enode, instance in mytor:
326

    
327
      if self.opts.iallocator:
328
        pnode = snode = None
329
        import_log_msg = ("- Import instance %s from node %s (iallocator: %s)" %
330
                          (instance, enode, self.opts.iallocator))
331
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
332
        snode = None
333
        import_log_msg = ("- Import instance %s from node %s to node %s" %
334
                          (instance, enode, pnode))
335
      else:
336
        import_log_msg = ("- Import instance %s from node %s to nodes %s/%s" %
337
                          (instance, enode, pnode, snode))
338

    
339
      exp_op = opcodes.OpExportInstance(instance_name=instance,
340
                                           target_node=enode,
341
                                           shutdown=True)
342
      rem_op = opcodes.OpRemoveInstance(instance_name=instance,
343
                                        ignore_failures=True)
344
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
345
                                           names=[instance])
346
      full_name = self.ExecOp(nam_op)[0][0]
347
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
348
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
349
                                        mem_size=128,
350
                                        disk_size=self.opts.os_size,
351
                                        swap_size=self.opts.swap_size,
352
                                        disk_template=self.opts.disk_template,
353
                                        mode=constants.INSTANCE_IMPORT,
354
                                        src_node=enode,
355
                                        src_path=imp_dir,
356
                                        pnode=pnode,
357
                                        snode=snode,
358
                                        vcpus=1,
359
                                        start=True,
360
                                        ip_check=True,
361
                                        wait_for_sync=True,
362
                                        mac="auto",
363
                                        file_storage_dir=None,
364
                                        file_driver=None,
365
                                        iallocator=self.opts.iallocator,
366
                                        hvm_nic_type=
367
                                        constants.HT_HVM_NIC_RTL8139,
368
                                        hvm_disk_type=
369
                                        constants.HT_HVM_DEV_IOEMU)
370

    
371
      erem_op = opcodes.OpRemoveExport(instance_name=instance)
372

    
373
      Log("- Export instance %s to node %s" % (instance, enode))
374
      self.ExecOp(exp_op)
375
      Log("- Remove instance %s" % (instance))
376
      self.ExecOp(rem_op)
377
      self.to_rem.remove(instance)
378
      Log(import_log_msg)
379
      self.ExecOp(imp_op)
380
      Log("- Remove export of instance %s" % (instance))
381
      self.ExecOp(erem_op)
382

    
383
      self.to_rem.append(instance)
384

    
385
  def StopInstance(self, instance):
386
    """Stop given instance."""
387
    op = opcodes.OpShutdownInstance(instance_name=instance)
388
    Log("- Shutdown instance %s" % instance)
389
    self.ExecOp(op)
390

    
391
  def StartInstance(self, instance):
392
    """Start given instance."""
393
    op = opcodes.OpStartupInstance(instance_name=instance, force=False)
394
    Log("- Start instance %s" % instance)
395
    self.ExecOp(op)
396

    
397
  def RenameInstance(self, instance, instance_new):
398
    """Rename instance."""
399
    op = opcodes.OpRenameInstance(instance_name=instance,
400
                                  new_name=instance_new)
401
    Log("- Rename instance %s to %s" % (instance, instance_new))
402
    self.ExecOp(op)
403

    
404
  def StopStart(self):
405
    """Stop/start the instances."""
406
    for instance in self.instances:
407
      self.StopInstance(instance)
408
      self.StartInstance(instance)
409

    
410
  def Remove(self):
411
    """Remove the instances."""
412
    for instance in self.to_rem:
413
      op = opcodes.OpRemoveInstance(instance_name=instance,
414
                                    ignore_failures=True)
415
      Log("- Remove instance %s" % instance)
416
      self.ExecOp(op)
417

    
418

    
419
  def Rename(self):
420
    """Rename the instances."""
421
    rename = self.opts.rename
422
    for instance in self.instances:
423
      self.StopInstance(instance)
424
      self.RenameInstance(instance, rename)
425
      self.StartInstance(rename)
426
      self.StopInstance(rename)
427
      self.RenameInstance(rename, instance)
428
      self.StartInstance(instance)
429

    
430
  def BurninCluster(self):
431
    """Test a cluster intensively.
432

    
433
    This will create instances and then start/stop/failover them.
434
    It is safe for existing instances but could impact performance.
435

    
436
    """
437

    
438
    opts = self.opts
439

    
440
    Log("- Testing global parameters")
441

    
442
    if (len(self.nodes) == 1 and
443
        opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
444
                                   constants.DT_FILE)):
445
      Log("When one node is available/selected the disk template must"
446
          " be 'diskless', 'file' or 'plain'")
447
      sys.exit(1)
448

    
449
    has_err = True
450
    try:
451
      self.CreateInstances()
452
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
453
        self.ReplaceDisks1D8()
454
      if (opts.do_replace2 and len(self.nodes) > 2 and
455
          opts.disk_template in constants.DTS_NET_MIRROR) :
456
        self.ReplaceDisks2()
457

    
458
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
459
        self.Failover()
460

    
461
      if opts.do_importexport:
462
        self.ImportExport()
463

    
464
      if opts.do_startstop:
465
        self.StopStart()
466

    
467
      if opts.rename:
468
        self.Rename()
469

    
470
      has_err = False
471
    finally:
472
      if has_err:
473
        Log("Error detected: opcode buffer follows:\n\n")
474
        Log(self.GetFeedbackBuf())
475
        Log("\n\n")
476
      self.Remove()
477

    
478
    return 0
479

    
480

    
481
def main():
482
  """Main function"""
483

    
484
  burner = Burner()
485
  return burner.BurninCluster()
486

    
487

    
488
if __name__ == "__main__":
489
  main()