Statistics
| Branch: | Tag: | Revision:

root / tools / burnin @ 6e06b36c

History | View | Annotate | Download (10.7 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Burnin program
23

    
24
"""
25

    
26
import sys
27
import optparse
28
from itertools import izip, islice, cycle
29
from cStringIO import StringIO
30

    
31
from ganeti import opcodes
32
from ganeti import mcpu
33
from ganeti import constants
34
from ganeti import cli
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38

    
39

    
40
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
41

    
42

    
43
def Usage():
44
  """Shows program usage information and exits the program."""
45

    
46
  print >> sys.stderr, "Usage:"
47
  print >> sys.stderr, USAGE
48
  sys.exit(2)
49

    
50

    
51
def Log(msg):
52
  """Simple function that prints out its argument.
53

    
54
  """
55
  print msg
56
  sys.stdout.flush()
57

    
58

    
59
class Burner(object):
60
  """Burner class."""
61

    
62
  def __init__(self):
63
    """Constructor."""
64
    logger.SetupLogging(debug=False, program="ganeti/burnin")
65
    self._feed_buf = StringIO()
66
    self.proc = mcpu.Processor(feedback=self.Feedback)
67
    self.nodes = []
68
    self.instances = []
69
    self.to_rem = []
70
    self.opts = None
71
    self.ParseOptions()
72
    self.GetState()
73

    
74
  def ClearFeedbackBuf(self):
75
    """Clear the feedback buffer."""
76
    self._feed_buf.truncate(0)
77

    
78
  def GetFeedbackBuf(self):
79
    """Return the contents of the buffer."""
80
    return self._feed_buf.getvalue()
81

    
82
  def Feedback(self, msg):
83
    """Acumulate feedback in our buffer."""
84
    self._feed_buf.write(msg)
85
    self._feed_buf.write("\n")
86

    
87
  def ExecOp(self, op):
88
    """Execute an opcode and manage the exec buffer."""
89
    self.ClearFeedbackBuf()
90
    return self.proc.ExecOpCode(op)
91

    
92
  def ParseOptions(self):
93
    """Parses the command line options.
94

    
95
    In case of command line errors, it will show the usage and exit the
96
    program.
97

    
98
    """
99

    
100
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
101
                                   version="%%prog (ganeti) %s" %
102
                                   constants.RELEASE_VERSION,
103
                                   option_class=cli.CliOption)
104

    
105
    parser.add_option("-o", "--os", dest="os", default=None,
106
                      help="OS to use during burnin",
107
                      metavar="<OS>")
108
    parser.add_option("--os-size", dest="os_size", help="Disk size",
109
                      default=4 * 1024, type="unit", metavar="<size>")
110
    parser.add_option("--swap-size", dest="swap_size", help="Swap size",
111
                      default=4 * 1024, type="unit", metavar="<size>")
112
    parser.add_option("-v", "--verbose",
113
                      action="store_true", dest="verbose", default=False,
114
                      help="print command execution messages to stdout")
115
    parser.add_option("--no-replace1", dest="do_replace1",
116
                      help="Skip disk replacement with the same secondary",
117
                      action="store_false", default=True)
118
    parser.add_option("--no-replace2", dest="do_replace2",
119
                      help="Skip disk replacement with a different secondary",
120
                      action="store_false", default=True)
121
    parser.add_option("--no-failover", dest="do_failover",
122
                      help="Skip instance failovers", action="store_false",
123
                      default=True)
124
    parser.add_option("-t", "--disk-template", dest="disk_template",
125
                      choices=("remote_raid1", "drbd"),
126
                      default="remote_raid1",
127
                      help="Template type for network mirroring (remote_raid1"
128
                      " or drbd) [remote_raid1]")
129
    parser.add_option("-n", "--nodes", dest="nodes", default="",
130
                      help="Comma separated list of nodes to perform"
131
                      " the burnin on (defaults to all nodes)")
132

    
133
    options, args = parser.parse_args()
134
    if len(args) < 1 or options.os is None:
135
      Usage()
136

    
137
    supported_disk_templates = (constants.DT_PLAIN, constants.DT_REMOTE_RAID1,
138
                                constants.DT_DRBD8)
139
    if options.disk_template not in supported_disk_templates:
140
      Log("Unknown disk template '%s'" % options.disk_template)
141
      sys.exit(1)
142

    
143
    self.opts = options
144
    self.instances = args
145

    
146
  def GetState(self):
147
    """Read the cluster state from the config."""
148
    if self.opts.nodes:
149
      names = self.opts.nodes.split(",")
150
    else:
151
      names = []
152
    try:
153
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
154
      result = self.ExecOp(op)
155
    except errors.GenericError, err:
156
      err_code, msg = cli.FormatError(err)
157
      Log(msg)
158
      sys.exit(err_code)
159
    self.nodes = [data[0] for data in result]
160

    
161
    result = self.ExecOp(opcodes.OpDiagnoseOS())
162

    
163
    if not result:
164
      Log("Can't get the OS list")
165
      sys.exit(1)
166

    
167
    # filter non-valid OS-es
168
    oses = {}
169
    for node_name in result:
170
      oses[node_name] = [obj for obj in result[node_name] if obj]
171

    
172
    fnode = oses.keys()[0]
173
    os_set = set([os_inst.name for os_inst in oses[fnode]])
174
    del oses[fnode]
175
    for node in oses:
176
      os_set &= set([os_inst.name for os_inst in oses[node]])
177

    
178
    if self.opts.os not in os_set:
179
      Log("OS '%s' not found" % self.opts.os)
180
      sys.exit(1)
181

    
182
  def CreateInstances(self):
183
    """Create the given instances.
184

    
185
    """
186
    self.to_rem = []
187
    mytor = izip(cycle(self.nodes),
188
                 islice(cycle(self.nodes), 1, None),
189
                 self.instances)
190
    for pnode, snode, instance in mytor:
191
      op = opcodes.OpCreateInstance(instance_name=instance,
192
                                    mem_size=128,
193
                                    disk_size=self.opts.os_size,
194
                                    swap_size=self.opts.swap_size,
195
                                    disk_template=self.opts.disk_template,
196
                                    mode=constants.INSTANCE_CREATE,
197
                                    os_type=self.opts.os,
198
                                    pnode=pnode,
199
                                    snode=snode,
200
                                    vcpus=1,
201
                                    start=True,
202
                                    ip_check=True,
203
                                    wait_for_sync=True)
204
      Log("- Add instance %s on node %s" % (instance, pnode))
205
      self.ExecOp(op)
206
      self.to_rem.append(instance)
207

    
208
  def ReplaceDisks1R1(self):
209
    """Replace disks with the same secondary for rr1."""
210
    # replace all, both disks
211
    for instance in self.instances:
212
      op = opcodes.OpReplaceDisks(instance_name=instance,
213
                                  remote_node=None,
214
                                  mode=constants.REPLACE_DISK_ALL,
215
                                  disks=["sda", "sdb"])
216

    
217
      Log("- Replace disks for instance %s" % (instance))
218
      self.ExecOp(op)
219

    
220
  def ReplaceDisks1D8(self):
221
    """Replace disks on primary and secondary for drbd8."""
222
    for instance in self.instances:
223
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
224
        op = opcodes.OpReplaceDisks(instance_name=instance,
225
                                    mode=mode,
226
                                    disks=["sda", "sdb"])
227
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
228
        self.ExecOp(op)
229

    
230
  def ReplaceDisks2(self):
231
    """Replace secondary node."""
232
    if self.opts.disk_template == constants.DT_REMOTE_RAID1:
233
      mode = constants.REPLACE_DISK_ALL
234
    else:
235
      mode = constants.REPLACE_DISK_SEC
236

    
237
    mytor = izip(islice(cycle(self.nodes), 2, None),
238
                 self.instances)
239
    for tnode, instance in mytor:
240
      op = opcodes.OpReplaceDisks(instance_name=instance,
241
                                  mode=mode,
242
                                  remote_node=tnode,
243
                                  disks=["sda", "sdb"])
244
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
245
      self.ExecOp(op)
246

    
247
  def Failover(self):
248
    """Failover the instances."""
249

    
250
    for instance in self.instances:
251
      op = opcodes.OpFailoverInstance(instance_name=instance,
252
                                      ignore_consistency=False)
253

    
254
      Log("- Failover instance %s" % (instance))
255
      self.ExecOp(op)
256

    
257
  def StopStart(self):
258
    """Stop/start the instances."""
259
    for instance in self.instances:
260
      op = opcodes.OpShutdownInstance(instance_name=instance)
261
      Log("- Shutdown instance %s" % instance)
262
      self.ExecOp(op)
263
      op = opcodes.OpStartupInstance(instance_name=instance, force=False)
264
      Log("- Start instance %s" % instance)
265
      self.ExecOp(op)
266

    
267
  def Remove(self):
268
    """Remove the instances."""
269
    for instance in self.to_rem:
270
      op = opcodes.OpRemoveInstance(instance_name=instance)
271
      Log("- Remove instance %s" % instance)
272
      self.ExecOp(op)
273

    
274
  def BurninCluster(self):
275
    """Test a cluster intensively.
276

    
277
    This will create instances and then start/stop/failover them.
278
    It is safe for existing instances but could impact performance.
279

    
280
    """
281

    
282
    opts = self.opts
283

    
284
    Log("- Testing global parameters")
285

    
286
    if len(self.nodes) == 1 and opts.disk_template != constants.DT_PLAIN:
287
      Log("When one node is available/selected the disk template must"
288
               " be 'plain'")
289
      sys.exit(1)
290

    
291
    has_err = True
292
    try:
293
      self.CreateInstances()
294
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
295
        if opts.disk_template == constants.DT_REMOTE_RAID1:
296
          self.ReplaceDisks1R1()
297
        elif opts.disk_template == constants.DT_DRBD8:
298
          self.ReplaceDisks1D8()
299
      if (opts.do_replace2 and len(self.nodes) > 2 and
300
          opts.disk_template in constants.DTS_NET_MIRROR) :
301
        self.ReplaceDisks2()
302

    
303
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
304
        self.Failover()
305

    
306
      self.StopStart()
307
      has_err = False
308
    finally:
309
      if has_err:
310
        Log("Error detected: opcode buffer follows:\n\n")
311
        Log(self.GetFeedbackBuf())
312
        Log("\n\n")
313
      self.Remove()
314

    
315
    return 0
316

    
317

    
318
def main():
319
  """Main function"""
320

    
321
  burner = Burner()
322
  try:
323
    utils.Lock('cmd', max_retries=15, debug=True)
324
  except errors.LockError, err:
325
    logger.ToStderr(str(err))
326
    return 1
327
  try:
328
    retval = burner.BurninCluster()
329
  finally:
330
    utils.Unlock('cmd')
331
    utils.LockCleanup()
332
  return retval
333

    
334

    
335
if __name__ == "__main__":
336
  main()