Statistics
| Branch: | Tag: | Revision:

root / tools / burnin @ d7b47a77

History | View | Annotate | Download (13.5 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Burnin program
23

    
24
"""
25

    
26
import os
27
import sys
28
import optparse
29
from itertools import izip, islice, cycle
30
from cStringIO import StringIO
31

    
32
from ganeti import opcodes
33
from ganeti import mcpu
34
from ganeti import constants
35
from ganeti import cli
36
from ganeti import logger
37
from ganeti import errors
38
from ganeti import utils
39

    
40

    
41
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
42

    
43

    
44
def Usage():
45
  """Shows program usage information and exits the program."""
46

    
47
  print >> sys.stderr, "Usage:"
48
  print >> sys.stderr, USAGE
49
  sys.exit(2)
50

    
51

    
52
def Log(msg):
53
  """Simple function that prints out its argument.
54

    
55
  """
56
  print msg
57
  sys.stdout.flush()
58

    
59

    
60
class Burner(object):
61
  """Burner class."""
62

    
63
  def __init__(self):
64
    """Constructor."""
65
    logger.SetupLogging(debug=False, program="ganeti/burnin")
66
    self._feed_buf = StringIO()
67
    self.proc = mcpu.Processor(feedback=self.Feedback)
68
    self.nodes = []
69
    self.instances = []
70
    self.to_rem = []
71
    self.opts = None
72
    self.ParseOptions()
73
    self.GetState()
74

    
75
  def ClearFeedbackBuf(self):
76
    """Clear the feedback buffer."""
77
    self._feed_buf.truncate(0)
78

    
79
  def GetFeedbackBuf(self):
80
    """Return the contents of the buffer."""
81
    return self._feed_buf.getvalue()
82

    
83
  def Feedback(self, msg):
84
    """Acumulate feedback in our buffer."""
85
    self._feed_buf.write(msg)
86
    self._feed_buf.write("\n")
87
    if self.opts.verbose:
88
      Log(msg)
89

    
90
  def ExecOp(self, op):
91
    """Execute an opcode and manage the exec buffer."""
92
    self.ClearFeedbackBuf()
93
    return self.proc.ExecOpCode(op)
94

    
95
  def ParseOptions(self):
96
    """Parses the command line options.
97

    
98
    In case of command line errors, it will show the usage and exit the
99
    program.
100

    
101
    """
102

    
103
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
104
                                   version="%%prog (ganeti) %s" %
105
                                   constants.RELEASE_VERSION,
106
                                   option_class=cli.CliOption)
107

    
108
    parser.add_option("-o", "--os", dest="os", default=None,
109
                      help="OS to use during burnin",
110
                      metavar="<OS>")
111
    parser.add_option("--os-size", dest="os_size", help="Disk size",
112
                      default=4 * 1024, type="unit", metavar="<size>")
113
    parser.add_option("--swap-size", dest="swap_size", help="Swap size",
114
                      default=4 * 1024, type="unit", metavar="<size>")
115
    parser.add_option("-v", "--verbose",
116
                      action="store_true", dest="verbose", default=False,
117
                      help="print command execution messages to stdout")
118
    parser.add_option("--no-replace1", dest="do_replace1",
119
                      help="Skip disk replacement with the same secondary",
120
                      action="store_false", default=True)
121
    parser.add_option("--no-replace2", dest="do_replace2",
122
                      help="Skip disk replacement with a different secondary",
123
                      action="store_false", default=True)
124
    parser.add_option("--no-failover", dest="do_failover",
125
                      help="Skip instance failovers", action="store_false",
126
                      default=True)
127
    parser.add_option("--no-importexport", dest="do_importexport",
128
                      help="Skip instance export/import", action="store_false",
129
                      default=True)
130
    parser.add_option("--no-startstop", dest="do_startstop",
131
                      help="Skip instance stop/start", action="store_false",
132
                      default=True)
133
    parser.add_option("-t", "--disk-template", dest="disk_template",
134
                      choices=("diskless", "plain", "remote_raid1", "drbd"),
135
                      default="remote_raid1",
136
                      help="Template type for network mirroring (remote_raid1"
137
                      " or drbd) [remote_raid1]")
138
    parser.add_option("-n", "--nodes", dest="nodes", default="",
139
                      help="Comma separated list of nodes to perform"
140
                      " the burnin on (defaults to all nodes)")
141

    
142
    options, args = parser.parse_args()
143
    if len(args) < 1 or options.os is None:
144
      Usage()
145

    
146
    supported_disk_templates = (constants.DT_DISKLESS, constants.DT_PLAIN,
147
                                constants.DT_REMOTE_RAID1,
148
                                constants.DT_DRBD8)
149
    if options.disk_template not in supported_disk_templates:
150
      Log("Unknown disk template '%s'" % options.disk_template)
151
      sys.exit(1)
152

    
153
    self.opts = options
154
    self.instances = args
155

    
156
  def GetState(self):
157
    """Read the cluster state from the config."""
158
    if self.opts.nodes:
159
      names = self.opts.nodes.split(",")
160
    else:
161
      names = []
162
    try:
163
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
164
      result = self.ExecOp(op)
165
    except errors.GenericError, err:
166
      err_code, msg = cli.FormatError(err)
167
      Log(msg)
168
      sys.exit(err_code)
169
    self.nodes = [data[0] for data in result]
170

    
171
    result = self.ExecOp(opcodes.OpDiagnoseOS())
172

    
173
    if not result:
174
      Log("Can't get the OS list")
175
      sys.exit(1)
176

    
177
    # filter non-valid OS-es
178
    oses = {}
179
    for node_name in result:
180
      oses[node_name] = [obj for obj in result[node_name] if obj]
181

    
182
    fnode = oses.keys()[0]
183
    os_set = set([os_inst.name for os_inst in oses[fnode]])
184
    del oses[fnode]
185
    for node in oses:
186
      os_set &= set([os_inst.name for os_inst in oses[node]])
187

    
188
    if self.opts.os not in os_set:
189
      Log("OS '%s' not found" % self.opts.os)
190
      sys.exit(1)
191

    
192
  def CreateInstances(self):
193
    """Create the given instances.
194

    
195
    """
196
    self.to_rem = []
197
    mytor = izip(cycle(self.nodes),
198
                 islice(cycle(self.nodes), 1, None),
199
                 self.instances)
200
    for pnode, snode, instance in mytor:
201
      op = opcodes.OpCreateInstance(instance_name=instance,
202
                                    mem_size=128,
203
                                    disk_size=self.opts.os_size,
204
                                    swap_size=self.opts.swap_size,
205
                                    disk_template=self.opts.disk_template,
206
                                    mode=constants.INSTANCE_CREATE,
207
                                    os_type=self.opts.os,
208
                                    pnode=pnode,
209
                                    snode=snode,
210
                                    vcpus=1,
211
                                    start=True,
212
                                    ip_check=True,
213
                                    wait_for_sync=True,
214
                                    mac="auto",
215
                                    kernel_path=None,
216
                                    initrd_path=None,
217
                                    hvm_boot_order=None)
218
      Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode))
219
      self.ExecOp(op)
220
      self.to_rem.append(instance)
221

    
222
  def ReplaceDisks1R1(self):
223
    """Replace disks with the same secondary for rr1."""
224
    # replace all, both disks
225
    for instance in self.instances:
226
      op = opcodes.OpReplaceDisks(instance_name=instance,
227
                                  remote_node=None,
228
                                  mode=constants.REPLACE_DISK_ALL,
229
                                  disks=["sda", "sdb"])
230

    
231
      Log("- Replace disks for instance %s" % (instance))
232
      self.ExecOp(op)
233

    
234
  def ReplaceDisks1D8(self):
235
    """Replace disks on primary and secondary for drbd8."""
236
    for instance in self.instances:
237
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
238
        op = opcodes.OpReplaceDisks(instance_name=instance,
239
                                    mode=mode,
240
                                    disks=["sda", "sdb"])
241
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
242
        self.ExecOp(op)
243

    
244
  def ReplaceDisks2(self):
245
    """Replace secondary node."""
246
    if self.opts.disk_template == constants.DT_REMOTE_RAID1:
247
      mode = constants.REPLACE_DISK_ALL
248
    else:
249
      mode = constants.REPLACE_DISK_SEC
250

    
251
    mytor = izip(islice(cycle(self.nodes), 2, None),
252
                 self.instances)
253
    for tnode, instance in mytor:
254
      op = opcodes.OpReplaceDisks(instance_name=instance,
255
                                  mode=mode,
256
                                  remote_node=tnode,
257
                                  disks=["sda", "sdb"])
258
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
259
      self.ExecOp(op)
260

    
261
  def Failover(self):
262
    """Failover the instances."""
263

    
264
    for instance in self.instances:
265
      op = opcodes.OpFailoverInstance(instance_name=instance,
266
                                      ignore_consistency=False)
267

    
268
      Log("- Failover instance %s" % (instance))
269
      self.ExecOp(op)
270

    
271
  def ImportExport(self):
272
    """Export the instance, delete it, and import it back.
273

    
274
    """
275

    
276
    mytor = izip(cycle(self.nodes),
277
                 islice(cycle(self.nodes), 1, None),
278
                 islice(cycle(self.nodes), 2, None),
279
                 self.instances)
280

    
281
    for pnode, snode, enode, instance in mytor:
282
      exp_op = opcodes.OpExportInstance(instance_name=instance,
283
                                           target_node=enode,
284
                                           shutdown=True)
285
      rem_op = opcodes.OpRemoveInstance(instance_name=instance)
286
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
287
                                           names=[instance])
288
      full_name = self.ExecOp(nam_op)[0][0]
289
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
290
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
291
                                        mem_size=128,
292
                                        disk_size=self.opts.os_size,
293
                                        swap_size=self.opts.swap_size,
294
                                        disk_template=self.opts.disk_template,
295
                                        mode=constants.INSTANCE_IMPORT,
296
                                        src_node=enode,
297
                                        src_path=imp_dir,
298
                                        pnode=pnode,
299
                                        snode=snode,
300
                                        vcpus=1,
301
                                        start=True,
302
                                        ip_check=True,
303
                                        wait_for_sync=True,
304
                                        mac="auto")
305

    
306
      Log("- Export instance %s to node %s" % (instance, enode))
307
      self.ExecOp(exp_op)
308
      Log("- Remove instance %s" % (instance))
309
      self.ExecOp(rem_op)
310
      self.to_rem.remove(instance)
311
      Log("- Import instance %s from node %s to node %s" %
312
          (instance, enode, pnode))
313
      self.ExecOp(imp_op)
314
      self.to_rem.append(instance)
315

    
316
  def StopStart(self):
317
    """Stop/start the instances."""
318
    for instance in self.instances:
319
      op = opcodes.OpShutdownInstance(instance_name=instance)
320
      Log("- Shutdown instance %s" % instance)
321
      self.ExecOp(op)
322
      op = opcodes.OpStartupInstance(instance_name=instance, force=False)
323
      Log("- Start instance %s" % instance)
324
      self.ExecOp(op)
325

    
326
  def Remove(self):
327
    """Remove the instances."""
328
    for instance in self.to_rem:
329
      op = opcodes.OpRemoveInstance(instance_name=instance)
330
      Log("- Remove instance %s" % instance)
331
      self.ExecOp(op)
332

    
333
  def BurninCluster(self):
334
    """Test a cluster intensively.
335

    
336
    This will create instances and then start/stop/failover them.
337
    It is safe for existing instances but could impact performance.
338

    
339
    """
340

    
341
    opts = self.opts
342

    
343
    Log("- Testing global parameters")
344

    
345
    if (len(self.nodes) == 1 and
346
        opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN)):
347
      Log("When one node is available/selected the disk template must"
348
               " be 'plain' or 'diskless'")
349
      sys.exit(1)
350

    
351
    has_err = True
352
    try:
353
      self.CreateInstances()
354
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
355
        if opts.disk_template == constants.DT_REMOTE_RAID1:
356
          self.ReplaceDisks1R1()
357
        elif opts.disk_template == constants.DT_DRBD8:
358
          self.ReplaceDisks1D8()
359
      if (opts.do_replace2 and len(self.nodes) > 2 and
360
          opts.disk_template in constants.DTS_NET_MIRROR) :
361
        self.ReplaceDisks2()
362

    
363
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
364
        self.Failover()
365

    
366
      if opts.do_importexport:
367
        self.ImportExport()
368

    
369
      if opts.do_startstop:
370
        self.StopStart()
371

    
372
      has_err = False
373
    finally:
374
      if has_err:
375
        Log("Error detected: opcode buffer follows:\n\n")
376
        Log(self.GetFeedbackBuf())
377
        Log("\n\n")
378
      self.Remove()
379

    
380
    return 0
381

    
382

    
383
def main():
384
  """Main function"""
385

    
386
  burner = Burner()
387
  try:
388
    utils.Lock('cmd', max_retries=15, debug=True)
389
  except errors.LockError, err:
390
    logger.ToStderr(str(err))
391
    return 1
392
  try:
393
    retval = burner.BurninCluster()
394
  finally:
395
    utils.Unlock('cmd')
396
    utils.LockCleanup()
397
  return retval
398

    
399

    
400
if __name__ == "__main__":
401
  main()