Revision 175f44c2 tools/burnin

b/tools/burnin
1 1
#!/usr/bin/python
2 2
#
3 3

  
4
"""Burnin program"""
5

  
4 6
import sys
5 7
import optparse
8
from itertools import izip, islice, cycle
6 9

  
7 10
from ganeti import opcodes
8 11
from ganeti import mcpu
9
from ganeti import objects
10 12
from ganeti import constants
11 13
from ganeti import cli
12 14
from ganeti import logger
......
22 24
  print >> sys.stderr, USAGE
23 25
  sys.exit(2)
24 26

  
25

  
26 27
def Feedback(msg):
27 28
  """Simple function that prints out its argument.
28 29

  
29 30
  """
30 31
  print msg
31 32

  
32

  
33
def ParseOptions():
34
  """Parses the command line options.
35

  
36
  In case of command line errors, it will show the usage and exit the
37
  program.
38

  
39
  Returns:
40
    (options, args), as returned by OptionParser.parse_args
41
  """
42

  
43
  parser = optparse.OptionParser(usage="\n%s" % USAGE,
44
                                 version="%%prog (ganeti) %s" %
45
                                 constants.RELEASE_VERSION,
46
                                 option_class=cli.CliOption)
47

  
48
  parser.add_option("-o", "--os", dest="os", default=None,
49
                    help="OS to use during burnin",
50
                    metavar="<OS>")
51
  parser.add_option("--os-size", dest="os_size", help="Disk size",
52
                    default=4 * 1024, type="unit", metavar="<size>")
53
  parser.add_option("--swap-size", dest="swap_size", help="Swap size",
54
                    default=4 * 1024, type="unit", metavar="<size>")
55
  parser.add_option("-v", "--verbose",
56
                    action="store_true", dest="verbose", default=False,
57
                    help="print command execution messages to stdout")
58
  parser.add_option("--no-replace1", dest="do_replace1",
59
                    help="Do disk replacement with the same secondary",
60
                    action="store_false", default=True)
61
  parser.add_option("--no-replace2", dest="do_replace2",
62
                    help="Do disk replacement with a different secondary",
63
                    action="store_false", default=True)
64
  parser.add_option("--no-failover", dest="do_failover",
65
                    help="Do instance failovers", action="store_false",
66
                    default=True)
67
  parser.add_option("-t", "--disk-template", dest="disk_template",
68
                    choices=("remote_raid1", "drbd8"), default="remote_raid1",
69
                    help="Template type for network mirroring (remote_raid1"
70
                    " or drbd8) [remote_raid1]")
71
  parser.add_option("-n", "--nodes", dest="nodes", default="",
72
                    help="Comma separated list of nodes to perform the burnin"
73
                    " on (defaults to all nodes)")
74

  
75
  options, args = parser.parse_args()
76
  if len(args) < 1 or options.os is None:
77
    Usage()
78

  
79
  return options, args
80

  
81

  
82
def BurninCluster(opts, args):
83
  """Test a cluster intensively.
84

  
85
  This will create instances and then start/stop/failover them.
86
  It is safe for existing instances but could impact performance.
87

  
88
  """
89

  
90
  logger.SetupLogging(debug=True, program="ganeti/burnin")
91
  proc = mcpu.Processor(feedback=Feedback)
92
  if opts.nodes:
93
    names = opts.nodes.split(",")
94
  else:
95
    names = []
96
  try:
97
    result = proc.ExecOpCode(opcodes.OpQueryNodes(output_fields=["name"],
98
                                                  names=names))
99
  except errors.GenericError, err:
100
    err_code, msg = cli.FormatError(err)
101
    Feedback(msg)
102
    return err_code
103
  nodelist = [data[0] for data in result]
104

  
105
  Feedback("- Testing global parameters")
106

  
107
  result = proc.ExecOpCode(opcodes.OpDiagnoseOS())
108

  
109
  if not result:
110
    Feedback("Can't get the OS list")
111
    return 1
112

  
113
  # filter non-valid OS-es
114
  oses = {}
115
  for node_name in result:
116
    oses[node_name] = [obj for obj in result[node_name]
117
                       if isinstance(obj, objects.OS)]
118

  
119
  fnode = oses.keys()[0]
120
  os_set = set([os_inst.name for os_inst in oses[fnode]])
121
  del oses[fnode]
122
  for node in oses:
123
    os_set &= set([os_inst.name for os_inst in oses[node]])
124

  
125
  if opts.os not in os_set:
126
    Feedback("OS '%s' not found" % opts.os)
127
    return 1
128

  
129
  to_remove = []
130
  if opts.disk_template == "remote_raid1":
131
    disk_template = constants.DT_REMOTE_RAID1
132
  elif opts.disk_template == "drbd8":
133
    disk_template = constants.DT_DRBD8
134
  else:
135
    Feedback("Unknown disk template '%s'" % opts.disk_template)
136
    return 1
137
  try:
138
    idx = 0
139
    for instance_name in args:
140
      next_idx = idx + 1
141
      if next_idx >= len(nodelist):
142
        next_idx = 0
143
      pnode = nodelist[idx]
144
      snode = nodelist[next_idx]
145
      if len(nodelist) > 1:
146
        tplate = disk_template
147
      else:
148
        tplate = constants.DT_PLAIN
149

  
150
      op = opcodes.OpCreateInstance(instance_name=instance_name, mem_size=128,
151
                                    disk_size=opts.os_size,
152
                                    swap_size=opts.swap_size,
153
                                    disk_template=tplate,
33
class Burner(object):
34
  """Burner class."""
35

  
36
  def __init__(self):
37
    """Constructor."""
38
    logger.SetupLogging(debug=True, program="ganeti/burnin")
39
    self.proc = mcpu.Processor(feedback=Feedback)
40
    self.nodes = []
41
    self.instances = []
42
    self.to_rem = []
43
    self.opts = None
44
    self.ParseOptions()
45
    self.GetState()
46

  
47
  def ParseOptions(self):
48
    """Parses the command line options.
49

  
50
    In case of command line errors, it will show the usage and exit the
51
    program.
52

  
53
    """
54

  
55
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
56
                                   version="%%prog (ganeti) %s" %
57
                                   constants.RELEASE_VERSION,
58
                                   option_class=cli.CliOption)
59

  
60
    parser.add_option("-o", "--os", dest="os", default=None,
61
                      help="OS to use during burnin",
62
                      metavar="<OS>")
63
    parser.add_option("--os-size", dest="os_size", help="Disk size",
64
                      default=4 * 1024, type="unit", metavar="<size>")
65
    parser.add_option("--swap-size", dest="swap_size", help="Swap size",
66
                      default=4 * 1024, type="unit", metavar="<size>")
67
    parser.add_option("-v", "--verbose",
68
                      action="store_true", dest="verbose", default=False,
69
                      help="print command execution messages to stdout")
70
    parser.add_option("--no-replace1", dest="do_replace1",
71
                      help="Skip disk replacement with the same secondary",
72
                      action="store_false", default=True)
73
    parser.add_option("--no-replace2", dest="do_replace2",
74
                      help="Skip disk replacement with a different secondary",
75
                      action="store_false", default=True)
76
    parser.add_option("--no-failover", dest="do_failover",
77
                      help="Skip instance failovers", action="store_false",
78
                      default=True)
79
    parser.add_option("-t", "--disk-template", dest="disk_template",
80
                      choices=("remote_raid1", "drbd8"),
81
                      default="remote_raid1",
82
                      help="Template type for network mirroring (remote_raid1"
83
                      " or drbd8) [remote_raid1]")
84
    parser.add_option("-n", "--nodes", dest="nodes", default="",
85
                      help="Comma separated list of nodes to perform"
86
                      " the burnin on (defaults to all nodes)")
87

  
88
    options, args = parser.parse_args()
89
    if len(args) < 1 or options.os is None:
90
      Usage()
91

  
92
    if options.disk_template == "plain":
93
      disk_template = constants.DT_PLAIN
94
    elif options.disk_template == "remote_raid1":
95
      disk_template = constants.DT_REMOTE_RAID1
96
    elif options.disk_template == "drbd8":
97
      disk_template = constants.DT_DRBD8
98
    else:
99
      Feedback("Unknown disk template '%s'" % options.disk_template)
100
      sys.exit(1)
101

  
102
    options.disk_template = disk_template
103
    self.opts = options
104
    self.instances = args
105

  
106
  def GetState(self):
107
    """Read the cluster state from the config."""
108
    if self.opts.nodes:
109
      names = self.opts.nodes.split(",")
110
    else:
111
      names = []
112
    try:
113
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
114
      result = self.proc.ExecOpCode(op)
115
    except errors.GenericError, err:
116
      err_code, msg = cli.FormatError(err)
117
      Feedback(msg)
118
      sys.exit(err_code)
119
    self.nodes = [data[0] for data in result]
120

  
121
    result = self.proc.ExecOpCode(opcodes.OpDiagnoseOS())
122

  
123
    if not result:
124
      Feedback("Can't get the OS list")
125
      sys.exit(1)
126

  
127
    # filter non-valid OS-es
128
    oses = {}
129
    for node_name in result:
130
      oses[node_name] = [obj for obj in result[node_name] if obj]
131

  
132
    fnode = oses.keys()[0]
133
    os_set = set([os_inst.name for os_inst in oses[fnode]])
134
    del oses[fnode]
135
    for node in oses:
136
      os_set &= set([os_inst.name for os_inst in oses[node]])
137

  
138
    if self.opts.os not in os_set:
139
      Feedback("OS '%s' not found" % self.opts.os)
140
      sys.exit(1)
141

  
142
  def CreateInstances(self):
143
    """Create the given instances.
144

  
145
    """
146
    self.to_rem = []
147
    mytor = izip(cycle(self.nodes),
148
                 islice(cycle(self.nodes), 1, None),
149
                 self.instances)
150
    for pnode, snode, instance in mytor:
151
      op = opcodes.OpCreateInstance(instance_name=instance,
152
                                    mem_size=128,
153
                                    disk_size=self.opts.os_size,
154
                                    swap_size=self.opts.swap_size,
155
                                    disk_template=self.opts.disk_template,
154 156
                                    mode=constants.INSTANCE_CREATE,
155
                                    os_type=opts.os, pnode=pnode,
156
                                    snode=snode, vcpus=1,
157
                                    os_type=self.opts.os,
158
                                    pnode=pnode,
159
                                    snode=snode,
160
                                    vcpus=1,
157 161
                                    start=True,
158 162
                                    ip_check=True,
159 163
                                    wait_for_sync=True)
160
      Feedback("- Add instance %s on node %s" % (instance_name, pnode))
161
      result = proc.ExecOpCode(op)
162
      to_remove.append(instance_name)
163
      idx = next_idx
164

  
165

  
166
    if opts.do_replace1:
167
      if len(nodelist) > 1:
168
        # failover
169
        for instance_name in args:
170
          op = opcodes.OpReplaceDisks(instance_name=instance_name,
171
                                      remote_node=None,
172
                                      mode=constants.REPLACE_DISK_ALL,
173
                                      disks=["sda", "sdb"])
174

  
175
          Feedback("- Replace disks for instance %s" % (instance_name))
176
          result = proc.ExecOpCode(op)
177
      else:
178
        Feedback("- Can't run replace1, not enough nodes")
179

  
180
    if opts.do_failover:
181
      if len(nodelist) > 1:
182
        # failover
183
        for instance_name in args:
184
          op = opcodes.OpFailoverInstance(instance_name=instance_name,
185
                                          ignore_consistency=True)
186

  
187
          Feedback("- Failover instance %s" % (instance_name))
188
          result = proc.ExecOpCode(op)
189
      else:
190
        Feedback("- Can't run failovers, not enough nodes")
191

  
192
    # stop / start
193
    for instance_name in args:
194
      op = opcodes.OpShutdownInstance(instance_name=instance_name)
195
      Feedback("- Shutdown instance %s" % instance_name)
196
      result = proc.ExecOpCode(op)
197
      op = opcodes.OpStartupInstance(instance_name=instance_name, force=False)
198
      Feedback("- Start instance %s" % instance_name)
199
      result = proc.ExecOpCode(op)
200

  
201
  finally:
202
    # remove
203
    for instance_name in to_remove:
204
      op = opcodes.OpRemoveInstance(instance_name=instance_name)
205
      Feedback("- Remove instance %s" % instance_name)
206
      result = proc.ExecOpCode(op)
207

  
208
  return 0
164
      Feedback("- Add instance %s on node %s" % (instance, pnode))
165
      self.proc.ExecOpCode(op)
166
      self.to_rem.append(instance)
167

  
168
  def ReplaceDisks1R1(self):
169
    """Replace disks with the same secondary for rr1."""
170
    # replace all, both disks
171
    for instance in self.instances:
172
      op = opcodes.OpReplaceDisks(instance_name=instance,
173
                                  remote_node=None,
174
                                  mode=constants.REPLACE_DISK_ALL,
175
                                  disks=["sda", "sdb"])
176

  
177
      Feedback("- Replace disks for instance %s" % (instance))
178
      self.proc.ExecOpCode(op)
179

  
180
  def ReplaceDisks1D8(self):
181
    """Replace disks on primary and secondary for drbd8."""
182
    for instance in self.instances:
183
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
184
        op = opcodes.OpReplaceDisks(instance_name=instance,
185
                                    mode=mode,
186
                                    disks=["sda", "sdb"])
187
        Feedback("- Replace disks (%s) for instance %s" % (mode, instance))
188
        self.proc.ExecOpCode(op)
189

  
190
  def ReplaceDisks2(self):
191
    """Replace secondary node."""
192
    if self.opts.disk_template == constants.DT_REMOTE_RAID1:
193
      mode = constants.REPLACE_DISK_ALL
194
    else:
195
      mode = constants.REPLACE_DISK_SEC
196

  
197
    mytor = izip(islice(cycle(self.nodes), 2, None),
198
                 self.instances)
199
    for tnode, instance in mytor:
200
      op = opcodes.OpReplaceDisks(instance_name=instance,
201
                                  mode=mode,
202
                                  remote_node=tnode,
203
                                  disks=["sda", "sdb"])
204
      Feedback("- Replace secondary (%s) for instance %s" % (mode, instance))
205
      self.proc.ExecOpCode(op)
206

  
207
  def Failover(self):
208
    """Failover the instances."""
209

  
210
    for instance in self.instances:
211
      op = opcodes.OpFailoverInstance(instance_name=instance,
212
                                      ignore_consistency=False)
213

  
214
      Feedback("- Failover instance %s" % (instance))
215
      self.proc.ExecOpCode(op)
216

  
217
  def StopStart(self):
218
    """Stop/start the instances."""
219
    for instance in self.instances:
220
      op = opcodes.OpShutdownInstance(instance_name=instance)
221
      Feedback("- Shutdown instance %s" % instance)
222
      self.proc.ExecOpCode(op)
223
      op = opcodes.OpStartupInstance(instance_name=instance, force=False)
224
      Feedback("- Start instance %s" % instance)
225
      self.proc.ExecOpCode(op)
226

  
227
  def Remove(self):
228
    """Remove the instances."""
229
    for instance in self.to_rem:
230
      op = opcodes.OpRemoveInstance(instance_name=instance)
231
      Feedback("- Remove instance %s" % instance)
232
      self.proc.ExecOpCode(op)
233

  
234
  def BurninCluster(self):
235
    """Test a cluster intensively.
236

  
237
    This will create instances and then start/stop/failover them.
238
    It is safe for existing instances but could impact performance.
239

  
240
    """
241

  
242
    opts = self.opts
243

  
244
    Feedback("- Testing global parameters")
245

  
246
    if len(self.nodes) == 1 and opts.disk_template != constants.DT_PLAIN:
247
      Feedback("When one node is available/selected the disk template must"
248
               " be 'plain'")
249
      sys.exit(1)
250

  
251
    try:
252
      self.CreateInstances()
253
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
254
        if opts.disk_template == constants.DT_REMOTE_RAID1:
255
          self.ReplaceDisks1R1()
256
        elif opts.disk_template == constants.DT_DRBD8:
257
          self.ReplaceDisks1D8()
258
      if (opts.do_replace2 and len(self.nodes) > 2 and
259
          opts.disk_template in constants.DTS_NET_MIRROR) :
260
        self.ReplaceDisks2()
261

  
262
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
263
        self.Failover()
264

  
265
      self.StopStart()
266

  
267
    finally:
268
      self.Remove()
269

  
270
    return 0
209 271

  
210 272
def main():
211 273
  """Main function"""
212 274

  
213
  opts, args = ParseOptions()
275
  burner = Burner()
214 276
  try:
215 277
    utils.Lock('cmd', max_retries=15, debug=True)
216 278
  except errors.LockError, err:
217 279
    logger.ToStderr(str(err))
218 280
    return 1
219 281
  try:
220
    retval = BurninCluster(opts, args)
282
    retval = burner.BurninCluster()
221 283
  finally:
222 284
    utils.Unlock('cmd')
223 285
    utils.LockCleanup()

Also available in: Unified diff