Statistics
| Branch: | Tag: | Revision:

root / tools / burnin @ bd5e77f9

History | View | Annotate | Download (13.1 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Burnin program
23

    
24
"""
25

    
26
import os
27
import sys
28
import optparse
29
from itertools import izip, islice, cycle
30
from cStringIO import StringIO
31

    
32
from ganeti import opcodes
33
from ganeti import mcpu
34
from ganeti import constants
35
from ganeti import cli
36
from ganeti import logger
37
from ganeti import errors
38
from ganeti import utils
39

    
40

    
41
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
42

    
43

    
44
def Usage():
45
  """Shows program usage information and exits the program."""
46

    
47
  print >> sys.stderr, "Usage:"
48
  print >> sys.stderr, USAGE
49
  sys.exit(2)
50

    
51

    
52
def Log(msg):
53
  """Simple function that prints out its argument.
54

    
55
  """
56
  print msg
57
  sys.stdout.flush()
58

    
59

    
60
class Burner(object):
61
  """Burner class."""
62

    
63
  def __init__(self):
64
    """Constructor."""
65
    logger.SetupLogging(debug=False, program="ganeti/burnin")
66
    self._feed_buf = StringIO()
67
    self.proc = mcpu.Processor(feedback=self.Feedback)
68
    self.nodes = []
69
    self.instances = []
70
    self.to_rem = []
71
    self.opts = None
72
    self.ParseOptions()
73
    self.GetState()
74

    
75
  def ClearFeedbackBuf(self):
76
    """Clear the feedback buffer."""
77
    self._feed_buf.truncate(0)
78

    
79
  def GetFeedbackBuf(self):
80
    """Return the contents of the buffer."""
81
    return self._feed_buf.getvalue()
82

    
83
  def Feedback(self, msg):
84
    """Acumulate feedback in our buffer."""
85
    self._feed_buf.write(msg)
86
    self._feed_buf.write("\n")
87

    
88
  def ExecOp(self, op):
89
    """Execute an opcode and manage the exec buffer."""
90
    self.ClearFeedbackBuf()
91
    return self.proc.ExecOpCode(op)
92

    
93
  def ParseOptions(self):
94
    """Parses the command line options.
95

    
96
    In case of command line errors, it will show the usage and exit the
97
    program.
98

    
99
    """
100

    
101
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
102
                                   version="%%prog (ganeti) %s" %
103
                                   constants.RELEASE_VERSION,
104
                                   option_class=cli.CliOption)
105

    
106
    parser.add_option("-o", "--os", dest="os", default=None,
107
                      help="OS to use during burnin",
108
                      metavar="<OS>")
109
    parser.add_option("--os-size", dest="os_size", help="Disk size",
110
                      default=4 * 1024, type="unit", metavar="<size>")
111
    parser.add_option("--swap-size", dest="swap_size", help="Swap size",
112
                      default=4 * 1024, type="unit", metavar="<size>")
113
    parser.add_option("-v", "--verbose",
114
                      action="store_true", dest="verbose", default=False,
115
                      help="print command execution messages to stdout")
116
    parser.add_option("--no-replace1", dest="do_replace1",
117
                      help="Skip disk replacement with the same secondary",
118
                      action="store_false", default=True)
119
    parser.add_option("--no-replace2", dest="do_replace2",
120
                      help="Skip disk replacement with a different secondary",
121
                      action="store_false", default=True)
122
    parser.add_option("--no-failover", dest="do_failover",
123
                      help="Skip instance failovers", action="store_false",
124
                      default=True)
125
    parser.add_option("--no-importexport", dest="do_importexport",
126
                      help="Skip instance export/import", action="store_false",
127
                      default=True)
128
    parser.add_option("-t", "--disk-template", dest="disk_template",
129
                      choices=("plain", "remote_raid1", "drbd"),
130
                      default="remote_raid1",
131
                      help="Template type for network mirroring (remote_raid1"
132
                      " or drbd) [remote_raid1]")
133
    parser.add_option("-n", "--nodes", dest="nodes", default="",
134
                      help="Comma separated list of nodes to perform"
135
                      " the burnin on (defaults to all nodes)")
136

    
137
    options, args = parser.parse_args()
138
    if len(args) < 1 or options.os is None:
139
      Usage()
140

    
141
    supported_disk_templates = (constants.DT_PLAIN, constants.DT_REMOTE_RAID1,
142
                                constants.DT_DRBD8)
143
    if options.disk_template not in supported_disk_templates:
144
      Log("Unknown disk template '%s'" % options.disk_template)
145
      sys.exit(1)
146

    
147
    self.opts = options
148
    self.instances = args
149

    
150
  def GetState(self):
151
    """Read the cluster state from the config."""
152
    if self.opts.nodes:
153
      names = self.opts.nodes.split(",")
154
    else:
155
      names = []
156
    try:
157
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
158
      result = self.ExecOp(op)
159
    except errors.GenericError, err:
160
      err_code, msg = cli.FormatError(err)
161
      Log(msg)
162
      sys.exit(err_code)
163
    self.nodes = [data[0] for data in result]
164

    
165
    result = self.ExecOp(opcodes.OpDiagnoseOS())
166

    
167
    if not result:
168
      Log("Can't get the OS list")
169
      sys.exit(1)
170

    
171
    # filter non-valid OS-es
172
    oses = {}
173
    for node_name in result:
174
      oses[node_name] = [obj for obj in result[node_name] if obj]
175

    
176
    fnode = oses.keys()[0]
177
    os_set = set([os_inst.name for os_inst in oses[fnode]])
178
    del oses[fnode]
179
    for node in oses:
180
      os_set &= set([os_inst.name for os_inst in oses[node]])
181

    
182
    if self.opts.os not in os_set:
183
      Log("OS '%s' not found" % self.opts.os)
184
      sys.exit(1)
185

    
186
  def CreateInstances(self):
187
    """Create the given instances.
188

    
189
    """
190
    self.to_rem = []
191
    mytor = izip(cycle(self.nodes),
192
                 islice(cycle(self.nodes), 1, None),
193
                 self.instances)
194
    for pnode, snode, instance in mytor:
195
      op = opcodes.OpCreateInstance(instance_name=instance,
196
                                    mem_size=128,
197
                                    disk_size=self.opts.os_size,
198
                                    swap_size=self.opts.swap_size,
199
                                    disk_template=self.opts.disk_template,
200
                                    mode=constants.INSTANCE_CREATE,
201
                                    os_type=self.opts.os,
202
                                    pnode=pnode,
203
                                    snode=snode,
204
                                    vcpus=1,
205
                                    start=True,
206
                                    ip_check=True,
207
                                    wait_for_sync=True,
208
                                    mac="auto",
209
                                    kernel_path=None,
210
                                    initrd_path=None,
211
                                    hvm_boot_order=None)
212
      Log("- Add instance %s on node %s" % (instance, pnode))
213
      self.ExecOp(op)
214
      self.to_rem.append(instance)
215

    
216
  def ReplaceDisks1R1(self):
217
    """Replace disks with the same secondary for rr1."""
218
    # replace all, both disks
219
    for instance in self.instances:
220
      op = opcodes.OpReplaceDisks(instance_name=instance,
221
                                  remote_node=None,
222
                                  mode=constants.REPLACE_DISK_ALL,
223
                                  disks=["sda", "sdb"])
224

    
225
      Log("- Replace disks for instance %s" % (instance))
226
      self.ExecOp(op)
227

    
228
  def ReplaceDisks1D8(self):
229
    """Replace disks on primary and secondary for drbd8."""
230
    for instance in self.instances:
231
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
232
        op = opcodes.OpReplaceDisks(instance_name=instance,
233
                                    mode=mode,
234
                                    disks=["sda", "sdb"])
235
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
236
        self.ExecOp(op)
237

    
238
  def ReplaceDisks2(self):
239
    """Replace secondary node."""
240
    if self.opts.disk_template == constants.DT_REMOTE_RAID1:
241
      mode = constants.REPLACE_DISK_ALL
242
    else:
243
      mode = constants.REPLACE_DISK_SEC
244

    
245
    mytor = izip(islice(cycle(self.nodes), 2, None),
246
                 self.instances)
247
    for tnode, instance in mytor:
248
      op = opcodes.OpReplaceDisks(instance_name=instance,
249
                                  mode=mode,
250
                                  remote_node=tnode,
251
                                  disks=["sda", "sdb"])
252
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
253
      self.ExecOp(op)
254

    
255
  def Failover(self):
256
    """Failover the instances."""
257

    
258
    for instance in self.instances:
259
      op = opcodes.OpFailoverInstance(instance_name=instance,
260
                                      ignore_consistency=False)
261

    
262
      Log("- Failover instance %s" % (instance))
263
      self.ExecOp(op)
264

    
265
  def ImportExport(self):
266
    """Export the instance, delete it, and import it back.
267

    
268
    """
269

    
270
    mytor = izip(cycle(self.nodes),
271
                 islice(cycle(self.nodes), 1, None),
272
                 islice(cycle(self.nodes), 2, None),
273
                 self.instances)
274

    
275
    for pnode, snode, enode, instance in mytor:
276
      exp_op = opcodes.OpExportInstance(instance_name=instance,
277
                                           target_node=enode,
278
                                           shutdown=True)
279
      rem_op = opcodes.OpRemoveInstance(instance_name=instance)
280
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
281
                                           names=[instance])
282
      full_name = self.ExecOp(nam_op)[0][0]
283
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
284
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
285
                                        mem_size=128,
286
                                        disk_size=self.opts.os_size,
287
                                        swap_size=self.opts.swap_size,
288
                                        disk_template=self.opts.disk_template,
289
                                        mode=constants.INSTANCE_IMPORT,
290
                                        src_node=enode,
291
                                        src_path=imp_dir,
292
                                        pnode=pnode,
293
                                        snode=snode,
294
                                        vcpus=1,
295
                                        start=True,
296
                                        ip_check=True,
297
                                        wait_for_sync=True,
298
                                        mac="auto")
299

    
300
      Log("- Export instance %s to node %s" % (instance, enode))
301
      self.ExecOp(exp_op)
302
      Log("- Remove instance %s" % (instance))
303
      self.ExecOp(rem_op)
304
      self.to_rem.remove(instance)
305
      Log("- Import instance %s from node %s to node %s" %
306
          (instance, enode, pnode))
307
      self.ExecOp(imp_op)
308
      self.to_rem.append(instance)
309

    
310
  def StopStart(self):
311
    """Stop/start the instances."""
312
    for instance in self.instances:
313
      op = opcodes.OpShutdownInstance(instance_name=instance)
314
      Log("- Shutdown instance %s" % instance)
315
      self.ExecOp(op)
316
      op = opcodes.OpStartupInstance(instance_name=instance, force=False)
317
      Log("- Start instance %s" % instance)
318
      self.ExecOp(op)
319

    
320
  def Remove(self):
321
    """Remove the instances."""
322
    for instance in self.to_rem:
323
      op = opcodes.OpRemoveInstance(instance_name=instance)
324
      Log("- Remove instance %s" % instance)
325
      self.ExecOp(op)
326

    
327
  def BurninCluster(self):
328
    """Test a cluster intensively.
329

    
330
    This will create instances and then start/stop/failover them.
331
    It is safe for existing instances but could impact performance.
332

    
333
    """
334

    
335
    opts = self.opts
336

    
337
    Log("- Testing global parameters")
338

    
339
    if len(self.nodes) == 1 and opts.disk_template != constants.DT_PLAIN:
340
      Log("When one node is available/selected the disk template must"
341
               " be 'plain'")
342
      sys.exit(1)
343

    
344
    has_err = True
345
    try:
346
      self.CreateInstances()
347
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
348
        if opts.disk_template == constants.DT_REMOTE_RAID1:
349
          self.ReplaceDisks1R1()
350
        elif opts.disk_template == constants.DT_DRBD8:
351
          self.ReplaceDisks1D8()
352
      if (opts.do_replace2 and len(self.nodes) > 2 and
353
          opts.disk_template in constants.DTS_NET_MIRROR) :
354
        self.ReplaceDisks2()
355

    
356
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
357
        self.Failover()
358

    
359
      if opts.do_importexport:
360
        self.ImportExport()
361

    
362
      self.StopStart()
363
      has_err = False
364
    finally:
365
      if has_err:
366
        Log("Error detected: opcode buffer follows:\n\n")
367
        Log(self.GetFeedbackBuf())
368
        Log("\n\n")
369
      self.Remove()
370

    
371
    return 0
372

    
373

    
374
def main():
375
  """Main function"""
376

    
377
  burner = Burner()
378
  try:
379
    utils.Lock('cmd', max_retries=15, debug=True)
380
  except errors.LockError, err:
381
    logger.ToStderr(str(err))
382
    return 1
383
  try:
384
    retval = burner.BurninCluster()
385
  finally:
386
    utils.Unlock('cmd')
387
    utils.LockCleanup()
388
  return retval
389

    
390

    
391
if __name__ == "__main__":
392
  main()