Statistics
| Branch: | Tag: | Revision:

root / tools / burnin @ 4aa036ab

History | View | Annotate | Download (10.9 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Burnin program
23

    
24
"""
25

    
26
import sys
27
import optparse
28
from itertools import izip, islice, cycle
29
from cStringIO import StringIO
30

    
31
from ganeti import opcodes
32
from ganeti import mcpu
33
from ganeti import constants
34
from ganeti import cli
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38

    
39

    
40
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
41

    
42

    
43
def Usage():
44
  """Shows program usage information and exits the program."""
45

    
46
  print >> sys.stderr, "Usage:"
47
  print >> sys.stderr, USAGE
48
  sys.exit(2)
49

    
50

    
51
def Log(msg):
52
  """Simple function that prints out its argument.
53

    
54
  """
55
  print msg
56
  sys.stdout.flush()
57

    
58

    
59
class Burner(object):
60
  """Burner class."""
61

    
62
  def __init__(self):
63
    """Constructor."""
64
    logger.SetupLogging(debug=False, program="ganeti/burnin")
65
    self._feed_buf = StringIO()
66
    self.proc = mcpu.Processor(feedback=self.Feedback)
67
    self.nodes = []
68
    self.instances = []
69
    self.to_rem = []
70
    self.opts = None
71
    self.ParseOptions()
72
    self.GetState()
73

    
74
  def ClearFeedbackBuf(self):
75
    """Clear the feedback buffer."""
76
    self._feed_buf.truncate(0)
77

    
78
  def GetFeedbackBuf(self):
79
    """Return the contents of the buffer."""
80
    return self._feed_buf.getvalue()
81

    
82
  def Feedback(self, msg):
83
    """Acumulate feedback in our buffer."""
84
    self._feed_buf.write(msg)
85
    self._feed_buf.write("\n")
86

    
87
  def ExecOp(self, op):
88
    """Execute an opcode and manage the exec buffer."""
89
    self.ClearFeedbackBuf()
90
    return self.proc.ExecOpCode(op)
91

    
92
  def ParseOptions(self):
93
    """Parses the command line options.
94

    
95
    In case of command line errors, it will show the usage and exit the
96
    program.
97

    
98
    """
99

    
100
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
101
                                   version="%%prog (ganeti) %s" %
102
                                   constants.RELEASE_VERSION,
103
                                   option_class=cli.CliOption)
104

    
105
    parser.add_option("-o", "--os", dest="os", default=None,
106
                      help="OS to use during burnin",
107
                      metavar="<OS>")
108
    parser.add_option("--os-size", dest="os_size", help="Disk size",
109
                      default=4 * 1024, type="unit", metavar="<size>")
110
    parser.add_option("--swap-size", dest="swap_size", help="Swap size",
111
                      default=4 * 1024, type="unit", metavar="<size>")
112
    parser.add_option("-v", "--verbose",
113
                      action="store_true", dest="verbose", default=False,
114
                      help="print command execution messages to stdout")
115
    parser.add_option("--no-replace1", dest="do_replace1",
116
                      help="Skip disk replacement with the same secondary",
117
                      action="store_false", default=True)
118
    parser.add_option("--no-replace2", dest="do_replace2",
119
                      help="Skip disk replacement with a different secondary",
120
                      action="store_false", default=True)
121
    parser.add_option("--no-failover", dest="do_failover",
122
                      help="Skip instance failovers", action="store_false",
123
                      default=True)
124
    parser.add_option("-t", "--disk-template", dest="disk_template",
125
                      choices=("plain", "remote_raid1", "drbd"),
126
                      default="remote_raid1",
127
                      help="Template type for network mirroring (remote_raid1"
128
                      " or drbd) [remote_raid1]")
129
    parser.add_option("-n", "--nodes", dest="nodes", default="",
130
                      help="Comma separated list of nodes to perform"
131
                      " the burnin on (defaults to all nodes)")
132

    
133
    options, args = parser.parse_args()
134
    if len(args) < 1 or options.os is None:
135
      Usage()
136

    
137
    supported_disk_templates = (constants.DT_PLAIN, constants.DT_REMOTE_RAID1,
138
                                constants.DT_DRBD8)
139
    if options.disk_template not in supported_disk_templates:
140
      Log("Unknown disk template '%s'" % options.disk_template)
141
      sys.exit(1)
142

    
143
    self.opts = options
144
    self.instances = args
145

    
146
  def GetState(self):
147
    """Read the cluster state from the config."""
148
    if self.opts.nodes:
149
      names = self.opts.nodes.split(",")
150
    else:
151
      names = []
152
    try:
153
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
154
      result = self.ExecOp(op)
155
    except errors.GenericError, err:
156
      err_code, msg = cli.FormatError(err)
157
      Log(msg)
158
      sys.exit(err_code)
159
    self.nodes = [data[0] for data in result]
160

    
161
    result = self.ExecOp(opcodes.OpDiagnoseOS())
162

    
163
    if not result:
164
      Log("Can't get the OS list")
165
      sys.exit(1)
166

    
167
    # filter non-valid OS-es
168
    oses = {}
169
    for node_name in result:
170
      oses[node_name] = [obj for obj in result[node_name] if obj]
171

    
172
    fnode = oses.keys()[0]
173
    os_set = set([os_inst.name for os_inst in oses[fnode]])
174
    del oses[fnode]
175
    for node in oses:
176
      os_set &= set([os_inst.name for os_inst in oses[node]])
177

    
178
    if self.opts.os not in os_set:
179
      Log("OS '%s' not found" % self.opts.os)
180
      sys.exit(1)
181

    
182
  def CreateInstances(self):
183
    """Create the given instances.
184

    
185
    """
186
    self.to_rem = []
187
    mytor = izip(cycle(self.nodes),
188
                 islice(cycle(self.nodes), 1, None),
189
                 self.instances)
190
    for pnode, snode, instance in mytor:
191
      op = opcodes.OpCreateInstance(instance_name=instance,
192
                                    mem_size=128,
193
                                    disk_size=self.opts.os_size,
194
                                    swap_size=self.opts.swap_size,
195
                                    disk_template=self.opts.disk_template,
196
                                    mode=constants.INSTANCE_CREATE,
197
                                    os_type=self.opts.os,
198
                                    pnode=pnode,
199
                                    snode=snode,
200
                                    vcpus=1,
201
                                    start=True,
202
                                    ip_check=True,
203
                                    wait_for_sync=True,
204
                                    mac="auto",
205
                                    kernel_path=None,
206
                                    initrd_path=None,
207
                                    hvm_boot_order=None)
208
      Log("- Add instance %s on node %s" % (instance, pnode))
209
      self.ExecOp(op)
210
      self.to_rem.append(instance)
211

    
212
  def ReplaceDisks1R1(self):
213
    """Replace disks with the same secondary for rr1."""
214
    # replace all, both disks
215
    for instance in self.instances:
216
      op = opcodes.OpReplaceDisks(instance_name=instance,
217
                                  remote_node=None,
218
                                  mode=constants.REPLACE_DISK_ALL,
219
                                  disks=["sda", "sdb"])
220

    
221
      Log("- Replace disks for instance %s" % (instance))
222
      self.ExecOp(op)
223

    
224
  def ReplaceDisks1D8(self):
225
    """Replace disks on primary and secondary for drbd8."""
226
    for instance in self.instances:
227
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
228
        op = opcodes.OpReplaceDisks(instance_name=instance,
229
                                    mode=mode,
230
                                    disks=["sda", "sdb"])
231
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
232
        self.ExecOp(op)
233

    
234
  def ReplaceDisks2(self):
235
    """Replace secondary node."""
236
    if self.opts.disk_template == constants.DT_REMOTE_RAID1:
237
      mode = constants.REPLACE_DISK_ALL
238
    else:
239
      mode = constants.REPLACE_DISK_SEC
240

    
241
    mytor = izip(islice(cycle(self.nodes), 2, None),
242
                 self.instances)
243
    for tnode, instance in mytor:
244
      op = opcodes.OpReplaceDisks(instance_name=instance,
245
                                  mode=mode,
246
                                  remote_node=tnode,
247
                                  disks=["sda", "sdb"])
248
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
249
      self.ExecOp(op)
250

    
251
  def Failover(self):
252
    """Failover the instances."""
253

    
254
    for instance in self.instances:
255
      op = opcodes.OpFailoverInstance(instance_name=instance,
256
                                      ignore_consistency=False)
257

    
258
      Log("- Failover instance %s" % (instance))
259
      self.ExecOp(op)
260

    
261
  def StopStart(self):
262
    """Stop/start the instances."""
263
    for instance in self.instances:
264
      op = opcodes.OpShutdownInstance(instance_name=instance)
265
      Log("- Shutdown instance %s" % instance)
266
      self.ExecOp(op)
267
      op = opcodes.OpStartupInstance(instance_name=instance, force=False)
268
      Log("- Start instance %s" % instance)
269
      self.ExecOp(op)
270

    
271
  def Remove(self):
272
    """Remove the instances."""
273
    for instance in self.to_rem:
274
      op = opcodes.OpRemoveInstance(instance_name=instance)
275
      Log("- Remove instance %s" % instance)
276
      self.ExecOp(op)
277

    
278
  def BurninCluster(self):
279
    """Test a cluster intensively.
280

    
281
    This will create instances and then start/stop/failover them.
282
    It is safe for existing instances but could impact performance.
283

    
284
    """
285

    
286
    opts = self.opts
287

    
288
    Log("- Testing global parameters")
289

    
290
    if len(self.nodes) == 1 and opts.disk_template != constants.DT_PLAIN:
291
      Log("When one node is available/selected the disk template must"
292
               " be 'plain'")
293
      sys.exit(1)
294

    
295
    has_err = True
296
    try:
297
      self.CreateInstances()
298
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
299
        if opts.disk_template == constants.DT_REMOTE_RAID1:
300
          self.ReplaceDisks1R1()
301
        elif opts.disk_template == constants.DT_DRBD8:
302
          self.ReplaceDisks1D8()
303
      if (opts.do_replace2 and len(self.nodes) > 2 and
304
          opts.disk_template in constants.DTS_NET_MIRROR) :
305
        self.ReplaceDisks2()
306

    
307
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
308
        self.Failover()
309

    
310
      self.StopStart()
311
      has_err = False
312
    finally:
313
      if has_err:
314
        Log("Error detected: opcode buffer follows:\n\n")
315
        Log(self.GetFeedbackBuf())
316
        Log("\n\n")
317
      self.Remove()
318

    
319
    return 0
320

    
321

    
322
def main():
323
  """Main function"""
324

    
325
  burner = Burner()
326
  try:
327
    utils.Lock('cmd', max_retries=15, debug=True)
328
  except errors.LockError, err:
329
    logger.ToStderr(str(err))
330
    return 1
331
  try:
332
    retval = burner.BurninCluster()
333
  finally:
334
    utils.Unlock('cmd')
335
    utils.LockCleanup()
336
  return retval
337

    
338

    
339
if __name__ == "__main__":
340
  main()