Statistics
| Branch: | Tag: | Revision:

root / tools / burnin @ d4844f0f

History | View | Annotate | Download (13.3 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Burnin program
23

    
24
"""
25

    
26
import os
27
import sys
28
import optparse
29
from itertools import izip, islice, cycle
30
from cStringIO import StringIO
31

    
32
from ganeti import opcodes
33
from ganeti import mcpu
34
from ganeti import constants
35
from ganeti import cli
36
from ganeti import logger
37
from ganeti import errors
38
from ganeti import utils
39

    
40

    
41
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
42

    
43

    
44
def Usage():
45
  """Shows program usage information and exits the program."""
46

    
47
  print >> sys.stderr, "Usage:"
48
  print >> sys.stderr, USAGE
49
  sys.exit(2)
50

    
51

    
52
def Log(msg):
53
  """Simple function that prints out its argument.
54

    
55
  """
56
  print msg
57
  sys.stdout.flush()
58

    
59

    
60
class Burner(object):
61
  """Burner class."""
62

    
63
  def __init__(self):
64
    """Constructor."""
65
    logger.SetupLogging(debug=False, program="ganeti/burnin")
66
    self._feed_buf = StringIO()
67
    self.proc = mcpu.Processor(feedback=self.Feedback)
68
    self.nodes = []
69
    self.instances = []
70
    self.to_rem = []
71
    self.opts = None
72
    self.ParseOptions()
73
    self.GetState()
74

    
75
  def ClearFeedbackBuf(self):
76
    """Clear the feedback buffer."""
77
    self._feed_buf.truncate(0)
78

    
79
  def GetFeedbackBuf(self):
80
    """Return the contents of the buffer."""
81
    return self._feed_buf.getvalue()
82

    
83
  def Feedback(self, msg):
84
    """Acumulate feedback in our buffer."""
85
    self._feed_buf.write(msg)
86
    self._feed_buf.write("\n")
87

    
88
  def ExecOp(self, op):
89
    """Execute an opcode and manage the exec buffer."""
90
    self.ClearFeedbackBuf()
91
    return self.proc.ExecOpCode(op)
92

    
93
  def ParseOptions(self):
94
    """Parses the command line options.
95

    
96
    In case of command line errors, it will show the usage and exit the
97
    program.
98

    
99
    """
100

    
101
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
102
                                   version="%%prog (ganeti) %s" %
103
                                   constants.RELEASE_VERSION,
104
                                   option_class=cli.CliOption)
105

    
106
    parser.add_option("-o", "--os", dest="os", default=None,
107
                      help="OS to use during burnin",
108
                      metavar="<OS>")
109
    parser.add_option("--os-size", dest="os_size", help="Disk size",
110
                      default=4 * 1024, type="unit", metavar="<size>")
111
    parser.add_option("--swap-size", dest="swap_size", help="Swap size",
112
                      default=4 * 1024, type="unit", metavar="<size>")
113
    parser.add_option("-v", "--verbose",
114
                      action="store_true", dest="verbose", default=False,
115
                      help="print command execution messages to stdout")
116
    parser.add_option("--no-replace1", dest="do_replace1",
117
                      help="Skip disk replacement with the same secondary",
118
                      action="store_false", default=True)
119
    parser.add_option("--no-replace2", dest="do_replace2",
120
                      help="Skip disk replacement with a different secondary",
121
                      action="store_false", default=True)
122
    parser.add_option("--no-failover", dest="do_failover",
123
                      help="Skip instance failovers", action="store_false",
124
                      default=True)
125
    parser.add_option("--no-importexport", dest="do_importexport",
126
                      help="Skip instance export/import", action="store_false",
127
                      default=True)
128
    parser.add_option("--no-startstop", dest="do_startstop",
129
                      help="Skip instance stop/start", action="store_false",
130
                      default=True)
131
    parser.add_option("-t", "--disk-template", dest="disk_template",
132
                      choices=("plain", "remote_raid1", "drbd"),
133
                      default="remote_raid1",
134
                      help="Template type for network mirroring (remote_raid1"
135
                      " or drbd) [remote_raid1]")
136
    parser.add_option("-n", "--nodes", dest="nodes", default="",
137
                      help="Comma separated list of nodes to perform"
138
                      " the burnin on (defaults to all nodes)")
139

    
140
    options, args = parser.parse_args()
141
    if len(args) < 1 or options.os is None:
142
      Usage()
143

    
144
    supported_disk_templates = (constants.DT_PLAIN, constants.DT_REMOTE_RAID1,
145
                                constants.DT_DRBD8)
146
    if options.disk_template not in supported_disk_templates:
147
      Log("Unknown disk template '%s'" % options.disk_template)
148
      sys.exit(1)
149

    
150
    self.opts = options
151
    self.instances = args
152

    
153
  def GetState(self):
154
    """Read the cluster state from the config."""
155
    if self.opts.nodes:
156
      names = self.opts.nodes.split(",")
157
    else:
158
      names = []
159
    try:
160
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
161
      result = self.ExecOp(op)
162
    except errors.GenericError, err:
163
      err_code, msg = cli.FormatError(err)
164
      Log(msg)
165
      sys.exit(err_code)
166
    self.nodes = [data[0] for data in result]
167

    
168
    result = self.ExecOp(opcodes.OpDiagnoseOS())
169

    
170
    if not result:
171
      Log("Can't get the OS list")
172
      sys.exit(1)
173

    
174
    # filter non-valid OS-es
175
    oses = {}
176
    for node_name in result:
177
      oses[node_name] = [obj for obj in result[node_name] if obj]
178

    
179
    fnode = oses.keys()[0]
180
    os_set = set([os_inst.name for os_inst in oses[fnode]])
181
    del oses[fnode]
182
    for node in oses:
183
      os_set &= set([os_inst.name for os_inst in oses[node]])
184

    
185
    if self.opts.os not in os_set:
186
      Log("OS '%s' not found" % self.opts.os)
187
      sys.exit(1)
188

    
189
  def CreateInstances(self):
190
    """Create the given instances.
191

    
192
    """
193
    self.to_rem = []
194
    mytor = izip(cycle(self.nodes),
195
                 islice(cycle(self.nodes), 1, None),
196
                 self.instances)
197
    for pnode, snode, instance in mytor:
198
      op = opcodes.OpCreateInstance(instance_name=instance,
199
                                    mem_size=128,
200
                                    disk_size=self.opts.os_size,
201
                                    swap_size=self.opts.swap_size,
202
                                    disk_template=self.opts.disk_template,
203
                                    mode=constants.INSTANCE_CREATE,
204
                                    os_type=self.opts.os,
205
                                    pnode=pnode,
206
                                    snode=snode,
207
                                    vcpus=1,
208
                                    start=True,
209
                                    ip_check=True,
210
                                    wait_for_sync=True,
211
                                    mac="auto",
212
                                    kernel_path=None,
213
                                    initrd_path=None,
214
                                    hvm_boot_order=None)
215
      Log("- Add instance %s on node %s" % (instance, pnode))
216
      self.ExecOp(op)
217
      self.to_rem.append(instance)
218

    
219
  def ReplaceDisks1R1(self):
220
    """Replace disks with the same secondary for rr1."""
221
    # replace all, both disks
222
    for instance in self.instances:
223
      op = opcodes.OpReplaceDisks(instance_name=instance,
224
                                  remote_node=None,
225
                                  mode=constants.REPLACE_DISK_ALL,
226
                                  disks=["sda", "sdb"])
227

    
228
      Log("- Replace disks for instance %s" % (instance))
229
      self.ExecOp(op)
230

    
231
  def ReplaceDisks1D8(self):
232
    """Replace disks on primary and secondary for drbd8."""
233
    for instance in self.instances:
234
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
235
        op = opcodes.OpReplaceDisks(instance_name=instance,
236
                                    mode=mode,
237
                                    disks=["sda", "sdb"])
238
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
239
        self.ExecOp(op)
240

    
241
  def ReplaceDisks2(self):
242
    """Replace secondary node."""
243
    if self.opts.disk_template == constants.DT_REMOTE_RAID1:
244
      mode = constants.REPLACE_DISK_ALL
245
    else:
246
      mode = constants.REPLACE_DISK_SEC
247

    
248
    mytor = izip(islice(cycle(self.nodes), 2, None),
249
                 self.instances)
250
    for tnode, instance in mytor:
251
      op = opcodes.OpReplaceDisks(instance_name=instance,
252
                                  mode=mode,
253
                                  remote_node=tnode,
254
                                  disks=["sda", "sdb"])
255
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
256
      self.ExecOp(op)
257

    
258
  def Failover(self):
259
    """Failover the instances."""
260

    
261
    for instance in self.instances:
262
      op = opcodes.OpFailoverInstance(instance_name=instance,
263
                                      ignore_consistency=False)
264

    
265
      Log("- Failover instance %s" % (instance))
266
      self.ExecOp(op)
267

    
268
  def ImportExport(self):
269
    """Export the instance, delete it, and import it back.
270

    
271
    """
272

    
273
    mytor = izip(cycle(self.nodes),
274
                 islice(cycle(self.nodes), 1, None),
275
                 islice(cycle(self.nodes), 2, None),
276
                 self.instances)
277

    
278
    for pnode, snode, enode, instance in mytor:
279
      exp_op = opcodes.OpExportInstance(instance_name=instance,
280
                                           target_node=enode,
281
                                           shutdown=True)
282
      rem_op = opcodes.OpRemoveInstance(instance_name=instance)
283
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
284
                                           names=[instance])
285
      full_name = self.ExecOp(nam_op)[0][0]
286
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
287
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
288
                                        mem_size=128,
289
                                        disk_size=self.opts.os_size,
290
                                        swap_size=self.opts.swap_size,
291
                                        disk_template=self.opts.disk_template,
292
                                        mode=constants.INSTANCE_IMPORT,
293
                                        src_node=enode,
294
                                        src_path=imp_dir,
295
                                        pnode=pnode,
296
                                        snode=snode,
297
                                        vcpus=1,
298
                                        start=True,
299
                                        ip_check=True,
300
                                        wait_for_sync=True,
301
                                        mac="auto")
302

    
303
      Log("- Export instance %s to node %s" % (instance, enode))
304
      self.ExecOp(exp_op)
305
      Log("- Remove instance %s" % (instance))
306
      self.ExecOp(rem_op)
307
      self.to_rem.remove(instance)
308
      Log("- Import instance %s from node %s to node %s" %
309
          (instance, enode, pnode))
310
      self.ExecOp(imp_op)
311
      self.to_rem.append(instance)
312

    
313
  def StopStart(self):
314
    """Stop/start the instances."""
315
    for instance in self.instances:
316
      op = opcodes.OpShutdownInstance(instance_name=instance)
317
      Log("- Shutdown instance %s" % instance)
318
      self.ExecOp(op)
319
      op = opcodes.OpStartupInstance(instance_name=instance, force=False)
320
      Log("- Start instance %s" % instance)
321
      self.ExecOp(op)
322

    
323
  def Remove(self):
324
    """Remove the instances."""
325
    for instance in self.to_rem:
326
      op = opcodes.OpRemoveInstance(instance_name=instance)
327
      Log("- Remove instance %s" % instance)
328
      self.ExecOp(op)
329

    
330
  def BurninCluster(self):
331
    """Test a cluster intensively.
332

    
333
    This will create instances and then start/stop/failover them.
334
    It is safe for existing instances but could impact performance.
335

    
336
    """
337

    
338
    opts = self.opts
339

    
340
    Log("- Testing global parameters")
341

    
342
    if len(self.nodes) == 1 and opts.disk_template != constants.DT_PLAIN:
343
      Log("When one node is available/selected the disk template must"
344
               " be 'plain'")
345
      sys.exit(1)
346

    
347
    has_err = True
348
    try:
349
      self.CreateInstances()
350
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
351
        if opts.disk_template == constants.DT_REMOTE_RAID1:
352
          self.ReplaceDisks1R1()
353
        elif opts.disk_template == constants.DT_DRBD8:
354
          self.ReplaceDisks1D8()
355
      if (opts.do_replace2 and len(self.nodes) > 2 and
356
          opts.disk_template in constants.DTS_NET_MIRROR) :
357
        self.ReplaceDisks2()
358

    
359
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
360
        self.Failover()
361

    
362
      if opts.do_importexport:
363
        self.ImportExport()
364

    
365
      if opts.do_startstop:
366
        self.StopStart()
367

    
368
      has_err = False
369
    finally:
370
      if has_err:
371
        Log("Error detected: opcode buffer follows:\n\n")
372
        Log(self.GetFeedbackBuf())
373
        Log("\n\n")
374
      self.Remove()
375

    
376
    return 0
377

    
378

    
379
def main():
380
  """Main function"""
381

    
382
  burner = Burner()
383
  try:
384
    utils.Lock('cmd', max_retries=15, debug=True)
385
  except errors.LockError, err:
386
    logger.ToStderr(str(err))
387
    return 1
388
  try:
389
    retval = burner.BurninCluster()
390
  finally:
391
    utils.Unlock('cmd')
392
    utils.LockCleanup()
393
  return retval
394

    
395

    
396
if __name__ == "__main__":
397
  main()