Statistics
| Branch: | Tag: | Revision:

root / tools / burnin @ 12c3449a

History | View | Annotate | Download (10.7 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Burnin program
23

    
24
"""
25

    
26
import sys
27
import optparse
28
from itertools import izip, islice, cycle
29
from cStringIO import StringIO
30

    
31
from ganeti import opcodes
32
from ganeti import mcpu
33
from ganeti import constants
34
from ganeti import cli
35
from ganeti import logger
36
from ganeti import errors
37
from ganeti import utils
38

    
39

    
40
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
41

    
42

    
43
def Usage():
44
  """Shows program usage information and exits the program."""
45

    
46
  print >> sys.stderr, "Usage:"
47
  print >> sys.stderr, USAGE
48
  sys.exit(2)
49

    
50

    
51
def Log(msg):
52
  """Simple function that prints out its argument.
53

    
54
  """
55
  print msg
56

    
57

    
58
class Burner(object):
59
  """Burner class."""
60

    
61
  def __init__(self):
62
    """Constructor."""
63
    logger.SetupLogging(debug=False, program="ganeti/burnin")
64
    self._feed_buf = StringIO()
65
    self.proc = mcpu.Processor(feedback=self.Feedback)
66
    self.nodes = []
67
    self.instances = []
68
    self.to_rem = []
69
    self.opts = None
70
    self.ParseOptions()
71
    self.GetState()
72

    
73
  def ClearFeedbackBuf(self):
74
    """Clear the feedback buffer."""
75
    self._feed_buf.truncate(0)
76

    
77
  def GetFeedbackBuf(self):
78
    """Return the contents of the buffer."""
79
    return self._feed_buf.getvalue()
80

    
81
  def Feedback(self, msg):
82
    """Acumulate feedback in our buffer."""
83
    self._feed_buf.write(msg)
84
    self._feed_buf.write("\n")
85

    
86
  def ExecOp(self, op):
87
    """Execute an opcode and manage the exec buffer."""
88
    self.ClearFeedbackBuf()
89
    return self.proc.ExecOpCode(op)
90

    
91
  def ParseOptions(self):
92
    """Parses the command line options.
93

    
94
    In case of command line errors, it will show the usage and exit the
95
    program.
96

    
97
    """
98

    
99
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
100
                                   version="%%prog (ganeti) %s" %
101
                                   constants.RELEASE_VERSION,
102
                                   option_class=cli.CliOption)
103

    
104
    parser.add_option("-o", "--os", dest="os", default=None,
105
                      help="OS to use during burnin",
106
                      metavar="<OS>")
107
    parser.add_option("--os-size", dest="os_size", help="Disk size",
108
                      default=4 * 1024, type="unit", metavar="<size>")
109
    parser.add_option("--swap-size", dest="swap_size", help="Swap size",
110
                      default=4 * 1024, type="unit", metavar="<size>")
111
    parser.add_option("-v", "--verbose",
112
                      action="store_true", dest="verbose", default=False,
113
                      help="print command execution messages to stdout")
114
    parser.add_option("--no-replace1", dest="do_replace1",
115
                      help="Skip disk replacement with the same secondary",
116
                      action="store_false", default=True)
117
    parser.add_option("--no-replace2", dest="do_replace2",
118
                      help="Skip disk replacement with a different secondary",
119
                      action="store_false", default=True)
120
    parser.add_option("--no-failover", dest="do_failover",
121
                      help="Skip instance failovers", action="store_false",
122
                      default=True)
123
    parser.add_option("-t", "--disk-template", dest="disk_template",
124
                      choices=("remote_raid1", "drbd"),
125
                      default="remote_raid1",
126
                      help="Template type for network mirroring (remote_raid1"
127
                      " or drbd) [remote_raid1]")
128
    parser.add_option("-n", "--nodes", dest="nodes", default="",
129
                      help="Comma separated list of nodes to perform"
130
                      " the burnin on (defaults to all nodes)")
131

    
132
    options, args = parser.parse_args()
133
    if len(args) < 1 or options.os is None:
134
      Usage()
135

    
136
    supported_disk_templates = (constants.DT_PLAIN, constants.DT_REMOTE_RAID1,
137
                                constants.DT_DRBD8)
138
    if options.disk_template not in supported_disk_templates:
139
      Log("Unknown disk template '%s'" % options.disk_template)
140
      sys.exit(1)
141

    
142
    self.opts = options
143
    self.instances = args
144

    
145
  def GetState(self):
146
    """Read the cluster state from the config."""
147
    if self.opts.nodes:
148
      names = self.opts.nodes.split(",")
149
    else:
150
      names = []
151
    try:
152
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
153
      result = self.ExecOp(op)
154
    except errors.GenericError, err:
155
      err_code, msg = cli.FormatError(err)
156
      Log(msg)
157
      sys.exit(err_code)
158
    self.nodes = [data[0] for data in result]
159

    
160
    result = self.ExecOp(opcodes.OpDiagnoseOS())
161

    
162
    if not result:
163
      Log("Can't get the OS list")
164
      sys.exit(1)
165

    
166
    # filter non-valid OS-es
167
    oses = {}
168
    for node_name in result:
169
      oses[node_name] = [obj for obj in result[node_name] if obj]
170

    
171
    fnode = oses.keys()[0]
172
    os_set = set([os_inst.name for os_inst in oses[fnode]])
173
    del oses[fnode]
174
    for node in oses:
175
      os_set &= set([os_inst.name for os_inst in oses[node]])
176

    
177
    if self.opts.os not in os_set:
178
      Log("OS '%s' not found" % self.opts.os)
179
      sys.exit(1)
180

    
181
  def CreateInstances(self):
182
    """Create the given instances.
183

    
184
    """
185
    self.to_rem = []
186
    mytor = izip(cycle(self.nodes),
187
                 islice(cycle(self.nodes), 1, None),
188
                 self.instances)
189
    for pnode, snode, instance in mytor:
190
      op = opcodes.OpCreateInstance(instance_name=instance,
191
                                    mem_size=128,
192
                                    disk_size=self.opts.os_size,
193
                                    swap_size=self.opts.swap_size,
194
                                    disk_template=self.opts.disk_template,
195
                                    mode=constants.INSTANCE_CREATE,
196
                                    os_type=self.opts.os,
197
                                    pnode=pnode,
198
                                    snode=snode,
199
                                    vcpus=1,
200
                                    start=True,
201
                                    ip_check=True,
202
                                    wait_for_sync=True)
203
      Log("- Add instance %s on node %s" % (instance, pnode))
204
      self.ExecOp(op)
205
      self.to_rem.append(instance)
206

    
207
  def ReplaceDisks1R1(self):
208
    """Replace disks with the same secondary for rr1."""
209
    # replace all, both disks
210
    for instance in self.instances:
211
      op = opcodes.OpReplaceDisks(instance_name=instance,
212
                                  remote_node=None,
213
                                  mode=constants.REPLACE_DISK_ALL,
214
                                  disks=["sda", "sdb"])
215

    
216
      Log("- Replace disks for instance %s" % (instance))
217
      self.ExecOp(op)
218

    
219
  def ReplaceDisks1D8(self):
220
    """Replace disks on primary and secondary for drbd8."""
221
    for instance in self.instances:
222
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
223
        op = opcodes.OpReplaceDisks(instance_name=instance,
224
                                    mode=mode,
225
                                    disks=["sda", "sdb"])
226
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
227
        self.ExecOp(op)
228

    
229
  def ReplaceDisks2(self):
230
    """Replace secondary node."""
231
    if self.opts.disk_template == constants.DT_REMOTE_RAID1:
232
      mode = constants.REPLACE_DISK_ALL
233
    else:
234
      mode = constants.REPLACE_DISK_SEC
235

    
236
    mytor = izip(islice(cycle(self.nodes), 2, None),
237
                 self.instances)
238
    for tnode, instance in mytor:
239
      op = opcodes.OpReplaceDisks(instance_name=instance,
240
                                  mode=mode,
241
                                  remote_node=tnode,
242
                                  disks=["sda", "sdb"])
243
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
244
      self.ExecOp(op)
245

    
246
  def Failover(self):
247
    """Failover the instances."""
248

    
249
    for instance in self.instances:
250
      op = opcodes.OpFailoverInstance(instance_name=instance,
251
                                      ignore_consistency=False)
252

    
253
      Log("- Failover instance %s" % (instance))
254
      self.ExecOp(op)
255

    
256
  def StopStart(self):
257
    """Stop/start the instances."""
258
    for instance in self.instances:
259
      op = opcodes.OpShutdownInstance(instance_name=instance)
260
      Log("- Shutdown instance %s" % instance)
261
      self.ExecOp(op)
262
      op = opcodes.OpStartupInstance(instance_name=instance, force=False)
263
      Log("- Start instance %s" % instance)
264
      self.ExecOp(op)
265

    
266
  def Remove(self):
267
    """Remove the instances."""
268
    for instance in self.to_rem:
269
      op = opcodes.OpRemoveInstance(instance_name=instance)
270
      Log("- Remove instance %s" % instance)
271
      self.ExecOp(op)
272

    
273
  def BurninCluster(self):
274
    """Test a cluster intensively.
275

    
276
    This will create instances and then start/stop/failover them.
277
    It is safe for existing instances but could impact performance.
278

    
279
    """
280

    
281
    opts = self.opts
282

    
283
    Log("- Testing global parameters")
284

    
285
    if len(self.nodes) == 1 and opts.disk_template != constants.DT_PLAIN:
286
      Log("When one node is available/selected the disk template must"
287
               " be 'plain'")
288
      sys.exit(1)
289

    
290
    has_err = True
291
    try:
292
      self.CreateInstances()
293
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
294
        if opts.disk_template == constants.DT_REMOTE_RAID1:
295
          self.ReplaceDisks1R1()
296
        elif opts.disk_template == constants.DT_DRBD8:
297
          self.ReplaceDisks1D8()
298
      if (opts.do_replace2 and len(self.nodes) > 2 and
299
          opts.disk_template in constants.DTS_NET_MIRROR) :
300
        self.ReplaceDisks2()
301

    
302
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
303
        self.Failover()
304

    
305
      self.StopStart()
306
      has_err = False
307
    finally:
308
      if has_err:
309
        Log("Error detected: opcode buffer follows:\n\n")
310
        Log(self.GetFeedbackBuf())
311
        Log("\n\n")
312
      self.Remove()
313

    
314
    return 0
315

    
316

    
317
def main():
318
  """Main function"""
319

    
320
  burner = Burner()
321
  try:
322
    utils.Lock('cmd', max_retries=15, debug=True)
323
  except errors.LockError, err:
324
    logger.ToStderr(str(err))
325
    return 1
326
  try:
327
    retval = burner.BurninCluster()
328
  finally:
329
    utils.Unlock('cmd')
330
    utils.LockCleanup()
331
  return retval
332

    
333

    
334
if __name__ == "__main__":
335
  main()