Statistics
| Branch: | Tag: | Revision:

root / tools / burnin @ 4501af56

History | View | Annotate | Download (10.8 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Burnin program
23

    
24
"""
25

    
26
import sys
27
import optparse
28
from itertools import izip, islice, cycle
29
from cStringIO import StringIO
30

    
31
from ganeti import opcodes
32
from ganeti import mcpu
33
from ganeti import constants
34
from ganeti import cli
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38

    
39

    
40
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
41

    
42

    
43
def Usage():
44
  """Shows program usage information and exits the program."""
45

    
46
  print >> sys.stderr, "Usage:"
47
  print >> sys.stderr, USAGE
48
  sys.exit(2)
49

    
50

    
51
def Log(msg):
52
  """Simple function that prints out its argument.
53

    
54
  """
55
  print msg
56
  sys.stdout.flush()
57

    
58

    
59
class Burner(object):
60
  """Burner class."""
61

    
62
  def __init__(self):
63
    """Constructor."""
64
    logger.SetupLogging(debug=False, program="ganeti/burnin")
65
    self._feed_buf = StringIO()
66
    self.proc = mcpu.Processor(feedback=self.Feedback)
67
    self.nodes = []
68
    self.instances = []
69
    self.to_rem = []
70
    self.opts = None
71
    self.ParseOptions()
72
    self.GetState()
73

    
74
  def ClearFeedbackBuf(self):
75
    """Clear the feedback buffer."""
76
    self._feed_buf.truncate(0)
77

    
78
  def GetFeedbackBuf(self):
79
    """Return the contents of the buffer."""
80
    return self._feed_buf.getvalue()
81

    
82
  def Feedback(self, msg):
83
    """Acumulate feedback in our buffer."""
84
    self._feed_buf.write(msg)
85
    self._feed_buf.write("\n")
86

    
87
  def ExecOp(self, op):
88
    """Execute an opcode and manage the exec buffer."""
89
    self.ClearFeedbackBuf()
90
    return self.proc.ExecOpCode(op)
91

    
92
  def ParseOptions(self):
93
    """Parses the command line options.
94

    
95
    In case of command line errors, it will show the usage and exit the
96
    program.
97

    
98
    """
99

    
100
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
101
                                   version="%%prog (ganeti) %s" %
102
                                   constants.RELEASE_VERSION,
103
                                   option_class=cli.CliOption)
104

    
105
    parser.add_option("-o", "--os", dest="os", default=None,
106
                      help="OS to use during burnin",
107
                      metavar="<OS>")
108
    parser.add_option("--os-size", dest="os_size", help="Disk size",
109
                      default=4 * 1024, type="unit", metavar="<size>")
110
    parser.add_option("--swap-size", dest="swap_size", help="Swap size",
111
                      default=4 * 1024, type="unit", metavar="<size>")
112
    parser.add_option("-v", "--verbose",
113
                      action="store_true", dest="verbose", default=False,
114
                      help="print command execution messages to stdout")
115
    parser.add_option("--no-replace1", dest="do_replace1",
116
                      help="Skip disk replacement with the same secondary",
117
                      action="store_false", default=True)
118
    parser.add_option("--no-replace2", dest="do_replace2",
119
                      help="Skip disk replacement with a different secondary",
120
                      action="store_false", default=True)
121
    parser.add_option("--no-failover", dest="do_failover",
122
                      help="Skip instance failovers", action="store_false",
123
                      default=True)
124
    parser.add_option("-t", "--disk-template", dest="disk_template",
125
                      choices=("remote_raid1", "drbd"),
126
                      default="remote_raid1",
127
                      help="Template type for network mirroring (remote_raid1"
128
                      " or drbd) [remote_raid1]")
129
    parser.add_option("-n", "--nodes", dest="nodes", default="",
130
                      help="Comma separated list of nodes to perform"
131
                      " the burnin on (defaults to all nodes)")
132

    
133
    options, args = parser.parse_args()
134
    if len(args) < 1 or options.os is None:
135
      Usage()
136

    
137
    supported_disk_templates = (constants.DT_PLAIN, constants.DT_REMOTE_RAID1,
138
                                constants.DT_DRBD8)
139
    if options.disk_template not in supported_disk_templates:
140
      Log("Unknown disk template '%s'" % options.disk_template)
141
      sys.exit(1)
142

    
143
    self.opts = options
144
    self.instances = args
145

    
146
  def GetState(self):
147
    """Read the cluster state from the config."""
148
    if self.opts.nodes:
149
      names = self.opts.nodes.split(",")
150
    else:
151
      names = []
152
    try:
153
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
154
      result = self.ExecOp(op)
155
    except errors.GenericError, err:
156
      err_code, msg = cli.FormatError(err)
157
      Log(msg)
158
      sys.exit(err_code)
159
    self.nodes = [data[0] for data in result]
160

    
161
    result = self.ExecOp(opcodes.OpDiagnoseOS())
162

    
163
    if not result:
164
      Log("Can't get the OS list")
165
      sys.exit(1)
166

    
167
    # filter non-valid OS-es
168
    oses = {}
169
    for node_name in result:
170
      oses[node_name] = [obj for obj in result[node_name] if obj]
171

    
172
    fnode = oses.keys()[0]
173
    os_set = set([os_inst.name for os_inst in oses[fnode]])
174
    del oses[fnode]
175
    for node in oses:
176
      os_set &= set([os_inst.name for os_inst in oses[node]])
177

    
178
    if self.opts.os not in os_set:
179
      Log("OS '%s' not found" % self.opts.os)
180
      sys.exit(1)
181

    
182
  def CreateInstances(self):
183
    """Create the given instances.
184

    
185
    """
186
    self.to_rem = []
187
    mytor = izip(cycle(self.nodes),
188
                 islice(cycle(self.nodes), 1, None),
189
                 self.instances)
190
    for pnode, snode, instance in mytor:
191
      op = opcodes.OpCreateInstance(instance_name=instance,
192
                                    mem_size=128,
193
                                    disk_size=self.opts.os_size,
194
                                    swap_size=self.opts.swap_size,
195
                                    disk_template=self.opts.disk_template,
196
                                    mode=constants.INSTANCE_CREATE,
197
                                    os_type=self.opts.os,
198
                                    pnode=pnode,
199
                                    snode=snode,
200
                                    vcpus=1,
201
                                    start=True,
202
                                    ip_check=True,
203
                                    wait_for_sync=True,
204
                                    mac="auto",
205
                                    kernel_path=None,
206
                                    initrd_path=None)
207
      Log("- Add instance %s on node %s" % (instance, pnode))
208
      self.ExecOp(op)
209
      self.to_rem.append(instance)
210

    
211
  def ReplaceDisks1R1(self):
212
    """Replace disks with the same secondary for rr1."""
213
    # replace all, both disks
214
    for instance in self.instances:
215
      op = opcodes.OpReplaceDisks(instance_name=instance,
216
                                  remote_node=None,
217
                                  mode=constants.REPLACE_DISK_ALL,
218
                                  disks=["sda", "sdb"])
219

    
220
      Log("- Replace disks for instance %s" % (instance))
221
      self.ExecOp(op)
222

    
223
  def ReplaceDisks1D8(self):
224
    """Replace disks on primary and secondary for drbd8."""
225
    for instance in self.instances:
226
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
227
        op = opcodes.OpReplaceDisks(instance_name=instance,
228
                                    mode=mode,
229
                                    disks=["sda", "sdb"])
230
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
231
        self.ExecOp(op)
232

    
233
  def ReplaceDisks2(self):
234
    """Replace secondary node."""
235
    if self.opts.disk_template == constants.DT_REMOTE_RAID1:
236
      mode = constants.REPLACE_DISK_ALL
237
    else:
238
      mode = constants.REPLACE_DISK_SEC
239

    
240
    mytor = izip(islice(cycle(self.nodes), 2, None),
241
                 self.instances)
242
    for tnode, instance in mytor:
243
      op = opcodes.OpReplaceDisks(instance_name=instance,
244
                                  mode=mode,
245
                                  remote_node=tnode,
246
                                  disks=["sda", "sdb"])
247
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
248
      self.ExecOp(op)
249

    
250
  def Failover(self):
251
    """Failover the instances."""
252

    
253
    for instance in self.instances:
254
      op = opcodes.OpFailoverInstance(instance_name=instance,
255
                                      ignore_consistency=False)
256

    
257
      Log("- Failover instance %s" % (instance))
258
      self.ExecOp(op)
259

    
260
  def StopStart(self):
261
    """Stop/start the instances."""
262
    for instance in self.instances:
263
      op = opcodes.OpShutdownInstance(instance_name=instance)
264
      Log("- Shutdown instance %s" % instance)
265
      self.ExecOp(op)
266
      op = opcodes.OpStartupInstance(instance_name=instance, force=False)
267
      Log("- Start instance %s" % instance)
268
      self.ExecOp(op)
269

    
270
  def Remove(self):
271
    """Remove the instances."""
272
    for instance in self.to_rem:
273
      op = opcodes.OpRemoveInstance(instance_name=instance)
274
      Log("- Remove instance %s" % instance)
275
      self.ExecOp(op)
276

    
277
  def BurninCluster(self):
278
    """Test a cluster intensively.
279

    
280
    This will create instances and then start/stop/failover them.
281
    It is safe for existing instances but could impact performance.
282

    
283
    """
284

    
285
    opts = self.opts
286

    
287
    Log("- Testing global parameters")
288

    
289
    if len(self.nodes) == 1 and opts.disk_template != constants.DT_PLAIN:
290
      Log("When one node is available/selected the disk template must"
291
               " be 'plain'")
292
      sys.exit(1)
293

    
294
    has_err = True
295
    try:
296
      self.CreateInstances()
297
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
298
        if opts.disk_template == constants.DT_REMOTE_RAID1:
299
          self.ReplaceDisks1R1()
300
        elif opts.disk_template == constants.DT_DRBD8:
301
          self.ReplaceDisks1D8()
302
      if (opts.do_replace2 and len(self.nodes) > 2 and
303
          opts.disk_template in constants.DTS_NET_MIRROR) :
304
        self.ReplaceDisks2()
305

    
306
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
307
        self.Failover()
308

    
309
      self.StopStart()
310
      has_err = False
311
    finally:
312
      if has_err:
313
        Log("Error detected: opcode buffer follows:\n\n")
314
        Log(self.GetFeedbackBuf())
315
        Log("\n\n")
316
      self.Remove()
317

    
318
    return 0
319

    
320

    
321
def main():
322
  """Main function"""
323

    
324
  burner = Burner()
325
  try:
326
    utils.Lock('cmd', max_retries=15, debug=True)
327
  except errors.LockError, err:
328
    logger.ToStderr(str(err))
329
    return 1
330
  try:
331
    retval = burner.BurninCluster()
332
  finally:
333
    utils.Unlock('cmd')
334
    utils.LockCleanup()
335
  return retval
336

    
337

    
338
if __name__ == "__main__":
339
  main()