backend: rename AttachOrAssemble to Assemble
[ganeti-local] / tools / burnin
index 1881d10..c316ca0 100755 (executable)
@@ -27,6 +27,9 @@ import os
 import sys
 import optparse
 import time
+import socket
+import urllib2
+import errno
 from itertools import izip, islice, cycle
 from cStringIO import StringIO
 
@@ -41,6 +44,10 @@ from ganeti import utils
 USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
 
 
+class InstanceDown(Exception):
+  """The checked instance was not up"""
+
+
 def Usage():
   """Shows program usage information and exits the program."""
 
@@ -49,13 +56,26 @@ def Usage():
   sys.exit(2)
 
 
-def Log(msg):
+def Log(msg, indent=0):
   """Simple function that prints out its argument.
 
   """
-  print msg
+  headers = {
+    0: "- ",
+    1: "* ",
+    2: ""
+    }
+  sys.stdout.write("%*s%s%s\n" % (2*indent, "",
+                                   headers.get(indent, "  "), msg))
   sys.stdout.flush()
 
+def Err(msg, exit_code=1):
+  """Simple error logging that prints to stderr.
+
+  """
+  sys.stderr.write(msg + "\n")
+  sys.stderr.flush()
+  sys.exit(exit_code)
 
 class Burner(object):
   """Burner class."""
@@ -85,7 +105,7 @@ class Burner(object):
     self._feed_buf.write("%s %s\n" % (time.ctime(utils.MergeTime(msg[0])),
                                       msg[2]))
     if self.opts.verbose:
-      Log(msg)
+      Log(msg, indent=3)
 
   def ExecOp(self, op):
     """Execute an opcode and manage the exec buffer."""
@@ -102,10 +122,10 @@ class Burner(object):
     """
     self.ClearFeedbackBuf()
     job_ids = [cli.SendJob(job, cl=self.cl) for job in jobs]
-    Log("- Submitted job IDs %s" % ", ".join(job_ids))
+    Log("Submitted job IDs %s" % ", ".join(job_ids), indent=1)
     results = []
     for jid in job_ids:
-      Log("- Waiting for job %s" % jid)
+      Log("Waiting for job %s" % jid, indent=2)
       results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
 
     return results
@@ -151,6 +171,24 @@ class Burner(object):
     parser.add_option("--no-startstop", dest="do_startstop",
                       help="Skip instance stop/start", action="store_false",
                       default=True)
+    parser.add_option("--no-reinstall", dest="do_reinstall",
+                      help="Skip instance reinstall", action="store_false",
+                      default=True)
+    parser.add_option("--no-reboot", dest="do_reboot",
+                      help="Skip instance reboot", action="store_false",
+                      default=True)
+    parser.add_option("--no-activate-disks", dest="do_activate_disks",
+                      help="Skip disk activation/deactivation",
+                      action="store_false", default=True)
+    parser.add_option("--no-add-disks", dest="do_addremove_disks",
+                      help="Skip disk addition/removal",
+                      action="store_false", default=True)
+    parser.add_option("--no-add-nics", dest="do_addremove_nics",
+                      help="Skip NIC addition/removal",
+                      action="store_false", default=True)
+    parser.add_option("--no-nics", dest="nics",
+                      help="No network interfaces", action="store_const",
+                      const=[], default=[{}])
     parser.add_option("--rename", dest="rename", default=None,
                       help="Give one unused instance name which is taken"
                            " to start the renaming sequence",
@@ -172,6 +210,16 @@ class Burner(object):
                       dest="parallel",
                       help="Enable parallelization of some operations in"
                       " order to speed burnin or to test granular locking")
+    parser.add_option("--net-timeout", default=15, type="int",
+                      dest="net_timeout",
+                      help="The instance check network timeout in seconds"
+                      " (defaults to 15 seconds)")
+    parser.add_option("-C", "--http-check", default=False, action="store_true",
+                      dest="http_check",
+                      help="Enable checking of instance status via http,"
+                      " looking for /hostname.txt that should contain the"
+                      " name of the instance")
+
 
     options, args = parser.parse_args()
     if len(args) < 1 or options.os is None:
@@ -182,26 +230,27 @@ class Burner(object):
                                 constants.DT_PLAIN,
                                 constants.DT_DRBD8)
     if options.disk_template not in supported_disk_templates:
-      Log("Unknown disk template '%s'" % options.disk_template)
-      sys.exit(1)
-
-    disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
-    disk_growth = [utils.ParseUnit(v) for v in options.disk_growth.split(",")]
-    if len(disk_growth) != len(disk_size):
-      Log("Wrong disk sizes/growth combination")
-      sys.exit(1)
+      Err("Unknown disk template '%s'" % options.disk_template)
+
+    if options.disk_template == constants.DT_DISKLESS:
+      disk_size = disk_growth = []
+      options.do_addremove_disks = False
+    else:
+      disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
+      disk_growth = [utils.ParseUnit(v)
+                     for v in options.disk_growth.split(",")]
+      if len(disk_growth) != len(disk_size):
+        Err("Wrong disk sizes/growth combination")
     if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
         (not disk_size and options.disk_template != constants.DT_DISKLESS)):
-      Log("Wrong disk count/disk template combination")
-      sys.exit(1)
+      Err("Wrong disk count/disk template combination")
 
     self.disk_size = disk_size
     self.disk_growth = disk_growth
     self.disk_count = len(disk_size)
 
     if options.nodes and options.iallocator:
-      Log("Give either the nodes option or the iallocator option, not both")
-      sys.exit(1)
+      Err("Give either the nodes option or the iallocator option, not both")
 
     self.opts = options
     self.instances = args
@@ -211,6 +260,8 @@ class Burner(object):
       }
     self.hvp = {}
 
+    socket.setdefaulttimeout(options.net_timeout)
+
   def GetState(self):
     """Read the cluster state from the config."""
     if self.opts.nodes:
@@ -218,27 +269,24 @@ class Burner(object):
     else:
       names = []
     try:
-      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
+      op = opcodes.OpQueryNodes(output_fields=["name", "offline"], names=names)
       result = self.ExecOp(op)
     except errors.GenericError, err:
       err_code, msg = cli.FormatError(err)
-      Log(msg)
-      sys.exit(err_code)
-    self.nodes = [data[0] for data in result]
+      Err(msg, exit_code=err_code)
+    self.nodes = [data[0] for data in result if not data[1]]
 
     result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
                                               names=[]))
 
     if not result:
-      Log("Can't get the OS list")
-      sys.exit(1)
+      Err("Can't get the OS list")
 
     # filter non-valid OS-es
     os_set = [val[0] for val in result if val[1]]
 
     if self.opts.os not in os_set:
-      Log("OS '%s' not found" % self.opts.os)
-      sys.exit(1)
+      Err("OS '%s' not found" % self.opts.os)
 
   def CreateInstances(self):
     """Create the given instances.
@@ -250,22 +298,25 @@ class Burner(object):
                  self.instances)
     jobset = []
 
+    Log("Creating instances")
     for pnode, snode, instance in mytor:
+      Log("instance %s" % instance, indent=1)
       if self.opts.iallocator:
         pnode = snode = None
-        Log("- Add instance %s (iallocator: %s)" %
-              (instance, self.opts.iallocator))
+        msg = "with iallocator %s" % self.opts.iallocator
       elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
         snode = None
-        Log("- Add instance %s on node %s" % (instance, pnode))
+        msg = "on %s" % pnode
       else:
-        Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode))
+        msg = "on %s, %s" % (pnode, snode)
+
+      Log(msg, indent=2)
 
       op = opcodes.OpCreateInstance(instance_name=instance,
                                     disks = [ {"size": size}
                                               for size in self.disk_size],
                                     disk_template=self.opts.disk_template,
-                                    nics=[{}],
+                                    nics=self.opts.nics,
                                     mode=constants.INSTANCE_CREATE,
                                     os_type=self.opts.os,
                                     pnode=pnode,
@@ -291,76 +342,91 @@ class Burner(object):
     if self.opts.parallel:
       self.ExecJobSet(jobset)
 
+    for instance in self.instances:
+      self._CheckInstanceAlive(instance)
+
   def GrowDisks(self):
     """Grow both the os and the swap disks by the requested amount, if any."""
+    Log("Growing disks")
     for instance in self.instances:
+      Log("instance %s" % instance, indent=1)
       for idx, growth in enumerate(self.disk_growth):
         if growth > 0:
           op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
                                   amount=growth, wait_for_sync=True)
-          Log("- Increase %s's disk/%s by %s MB" % (instance, idx, growth))
+          Log("increase disk/%s by %s MB" % (idx, growth), indent=2)
           self.ExecOp(op)
 
   def ReplaceDisks1D8(self):
     """Replace disks on primary and secondary for drbd8."""
+    Log("Replacing disks on the same nodes")
     for instance in self.instances:
+      Log("instance %s" % instance, indent=1)
       for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
         op = opcodes.OpReplaceDisks(instance_name=instance,
                                     mode=mode,
                                     disks=[i for i in range(self.disk_count)])
-        Log("- Replace disks (%s) for instance %s" % (mode, instance))
+        Log("run %s" % mode, indent=2)
         self.ExecOp(op)
 
   def ReplaceDisks2(self):
     """Replace secondary node."""
-    mode = constants.REPLACE_DISK_SEC
+    Log("Changing the secondary node")
+    mode = constants.REPLACE_DISK_CHG
 
     mytor = izip(islice(cycle(self.nodes), 2, None),
                  self.instances)
     for tnode, instance in mytor:
+      Log("instance %s" % instance, indent=1)
       if self.opts.iallocator:
         tnode = None
+        msg = "with iallocator %s" % self.opts.iallocator
+      else:
+        msg = tnode
       op = opcodes.OpReplaceDisks(instance_name=instance,
                                   mode=mode,
                                   remote_node=tnode,
                                   iallocator=self.opts.iallocator,
                                   disks=[i for i in range(self.disk_count)])
-      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
+      Log("run %s %s" % (mode, msg), indent=2)
       self.ExecOp(op)
 
   def Failover(self):
     """Failover the instances."""
-
+    Log("Failing over instances")
     for instance in self.instances:
+      Log("instance %s" % instance, indent=1)
       op = opcodes.OpFailoverInstance(instance_name=instance,
                                       ignore_consistency=False)
 
-      Log("- Failover instance %s" % (instance))
       self.ExecOp(op)
+    for instance in self.instances:
+      self._CheckInstanceAlive(instance)
 
   def ImportExport(self):
     """Export the instance, delete it, and import it back.
 
     """
-
+    Log("Exporting and re-importing instances")
     mytor = izip(cycle(self.nodes),
                  islice(cycle(self.nodes), 1, None),
                  islice(cycle(self.nodes), 2, None),
                  self.instances)
 
     for pnode, snode, enode, instance in mytor:
-
+      Log("instance %s" % instance, indent=1)
       if self.opts.iallocator:
         pnode = snode = None
-        import_log_msg = ("- Import instance %s from node %s (iallocator: %s)" %
-                          (instance, enode, self.opts.iallocator))
+        import_log_msg = ("import from %s"
+                          " with iallocator %s" %
+                          (enode, self.opts.iallocator))
       elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
         snode = None
-        import_log_msg = ("- Import instance %s from node %s to node %s" %
-                          (instance, enode, pnode))
+        import_log_msg = ("import from %s to %s" %
+                          (enode, pnode))
       else:
-        import_log_msg = ("- Import instance %s from node %s to nodes %s/%s" %
-                          (instance, enode, pnode, snode))
+        import_log_msg = ("import from %s to %s, %s" %
+                          (enode, pnode, snode))
 
       exp_op = opcodes.OpExportInstance(instance_name=instance,
                                            target_node=enode,
@@ -375,7 +441,7 @@ class Burner(object):
                                         disks = [ {"size": size}
                                                   for size in self.disk_size],
                                         disk_template=self.opts.disk_template,
-                                        nics=[{}],
+                                        nics=self.opts.nics,
                                         mode=constants.INSTANCE_IMPORT,
                                         src_node=enode,
                                         src_path=imp_dir,
@@ -393,63 +459,185 @@ class Burner(object):
 
       erem_op = opcodes.OpRemoveExport(instance_name=instance)
 
-      Log("- Export instance %s to node %s" % (instance, enode))
+      Log("export to node %s" % enode, indent=2)
       self.ExecOp(exp_op)
-      Log("- Remove instance %s" % (instance))
+      Log("remove instance", indent=2)
       self.ExecOp(rem_op)
       self.to_rem.remove(instance)
-      Log(import_log_msg)
+      Log(import_log_msg, indent=2)
       self.ExecOp(imp_op)
-      Log("- Remove export of instance %s" % (instance))
+      Log("remove export", indent=2)
       self.ExecOp(erem_op)
 
       self.to_rem.append(instance)
 
+    for instance in self.instances:
+      self._CheckInstanceAlive(instance)
+
   def StopInstance(self, instance):
     """Stop given instance."""
     op = opcodes.OpShutdownInstance(instance_name=instance)
-    Log("- Shutdown instance %s" % instance)
+    Log("shutdown", indent=2)
     self.ExecOp(op)
 
   def StartInstance(self, instance):
     """Start given instance."""
     op = opcodes.OpStartupInstance(instance_name=instance, force=False)
-    Log("- Start instance %s" % instance)
+    Log("startup", indent=2)
     self.ExecOp(op)
 
   def RenameInstance(self, instance, instance_new):
     """Rename instance."""
     op = opcodes.OpRenameInstance(instance_name=instance,
                                   new_name=instance_new)
-    Log("- Rename instance %s to %s" % (instance, instance_new))
+    Log("rename to %s" % instance_new, indent=2)
     self.ExecOp(op)
 
   def StopStart(self):
     """Stop/start the instances."""
+    Log("Stopping and starting instances")
     for instance in self.instances:
+      Log("instance %s" % instance, indent=1)
       self.StopInstance(instance)
       self.StartInstance(instance)
 
+    for instance in self.instances:
+      self._CheckInstanceAlive(instance)
+
   def Remove(self):
     """Remove the instances."""
+    Log("Removing instances")
     for instance in self.to_rem:
+      Log("instance %s" % instance, indent=1)
       op = opcodes.OpRemoveInstance(instance_name=instance,
                                     ignore_failures=True)
-      Log("- Remove instance %s" % instance)
       self.ExecOp(op)
 
-
   def Rename(self):
     """Rename the instances."""
+    Log("Renaming instances")
     rename = self.opts.rename
     for instance in self.instances:
+      Log("instance %s" % instance, indent=1)
       self.StopInstance(instance)
       self.RenameInstance(instance, rename)
       self.StartInstance(rename)
+      self._CheckInstanceAlive(rename)
       self.StopInstance(rename)
       self.RenameInstance(rename, instance)
       self.StartInstance(instance)
 
+    for instance in self.instances:
+      self._CheckInstanceAlive(instance)
+
+  def Reinstall(self):
+    """Reinstall the instances."""
+    Log("Reinstalling instances")
+    for instance in self.instances:
+      Log("instance %s" % instance, indent=1)
+      self.StopInstance(instance)
+      op = opcodes.OpReinstallInstance(instance_name=instance)
+      Log("reinstall without passing the OS", indent=2)
+      self.ExecOp(op)
+      op = opcodes.OpReinstallInstance(instance_name=instance,
+                                       os_type=self.opts.os)
+      Log("reinstall specifying the OS", indent=2)
+      self.ExecOp(op)
+      self.StartInstance(instance)
+    for instance in self.instances:
+      self._CheckInstanceAlive(instance)
+
+  def Reboot(self):
+    """Reboot the instances."""
+    Log("Rebooting instances")
+    for instance in self.instances:
+      Log("instance %s" % instance, indent=1)
+      for reboot_type in constants.REBOOT_TYPES:
+        op = opcodes.OpRebootInstance(instance_name=instance,
+                                      reboot_type=reboot_type,
+                                      ignore_secondaries=False)
+        Log("reboot with type '%s'" % reboot_type, indent=2)
+        self.ExecOp(op)
+        self._CheckInstanceAlive(instance)
+
+  def ActivateDisks(self):
+    """Activate and deactivate disks of the instances."""
+    Log("Activating/deactivating disks")
+    for instance in self.instances:
+      Log("instance %s" % instance, indent=1)
+      op_act = opcodes.OpActivateInstanceDisks(instance_name=instance)
+      op_deact = opcodes.OpDeactivateInstanceDisks(instance_name=instance)
+      Log("activate disks when online", indent=2)
+      self.ExecOp(op_act)
+      self.StopInstance(instance)
+      Log("activate disks when offline", indent=2)
+      self.ExecOp(op_act)
+      Log("deactivate disks (when offline)", indent=2)
+      self.ExecOp(op_deact)
+      self.StartInstance(instance)
+    for instance in self.instances:
+      self._CheckInstanceAlive(instance)
+
+  def AddRemoveDisks(self):
+    """Add and remove an extra disk for the instances."""
+    Log("Adding and removing disks")
+    for instance in self.instances:
+      Log("instance %s" % instance, indent=1)
+      op_add = opcodes.OpSetInstanceParams(\
+        instance_name=instance,
+        disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})])
+      op_rem = opcodes.OpSetInstanceParams(\
+        instance_name=instance, disks=[(constants.DDM_REMOVE, {})])
+      Log("adding a disk", indent=2)
+      self.ExecOp(op_add)
+      self.StopInstance(instance)
+      Log("removing last disk", indent=2)
+      self.ExecOp(op_rem)
+      self.StartInstance(instance)
+    for instance in self.instances:
+      self._CheckInstanceAlive(instance)
+
+  def AddRemoveNICs(self):
+    """Add and remove an extra NIC for the instances."""
+    Log("Adding and removing NICs")
+    for instance in self.instances:
+      Log("instance %s" % instance, indent=1)
+      op_add = opcodes.OpSetInstanceParams(\
+        instance_name=instance, nics=[(constants.DDM_ADD, {})])
+      op_rem = opcodes.OpSetInstanceParams(\
+        instance_name=instance, nics=[(constants.DDM_REMOVE, {})])
+      Log("adding a NIC", indent=2)
+      self.ExecOp(op_add)
+      Log("removing last NIC", indent=2)
+      self.ExecOp(op_rem)
+
+  def _CheckInstanceAlive(self, instance):
+    """Check if an instance is alive by doing http checks.
+
+    This will try to retrieve the url on the instance /hostname.txt
+    and check that it contains the hostname of the instance. In case
+    we get ECONNREFUSED, we retry up to the net timeout seconds, for
+    any other error we abort.
+
+    """
+    if not self.opts.http_check:
+      return
+    try:
+      for retries in range(self.opts.net_timeout):
+        try:
+          url = urllib2.urlopen("http://%s/hostname.txt" % instance)
+        except urllib2.URLError, err:
+          if err.args[0][0] == errno.ECONNREFUSED:
+            time.sleep(1)
+            continue
+          raise
+    except urllib2.URLError, err:
+      raise InstanceDown(instance, str(err))
+    hostname = url.read().strip()
+    if hostname != instance:
+      raise InstanceDown(instance, ("Hostname mismatch, expected %s, got %s" %
+                                    (instance, hostname)))
+
   def BurninCluster(self):
     """Test a cluster intensively.
 
@@ -460,14 +648,13 @@ class Burner(object):
 
     opts = self.opts
 
-    Log("- Testing global parameters")
+    Log("Testing global parameters")
 
     if (len(self.nodes) == 1 and
         opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
                                    constants.DT_FILE)):
-      Log("When one node is available/selected the disk template must"
+      Err("When one node is available/selected the disk template must"
           " be 'diskless', 'file' or 'plain'")
-      sys.exit(1)
 
     has_err = True
     try:
@@ -484,15 +671,32 @@ class Burner(object):
       if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
         self.Failover()
 
-      if opts.do_importexport:
+      if (opts.do_importexport and
+          opts.disk_template not in (constants.DT_DISKLESS,
+                                     constants.DT_FILE)):
         self.ImportExport()
 
-      if opts.do_startstop:
-        self.StopStart()
+      if opts.do_reinstall:
+        self.Reinstall()
+
+      if opts.do_reboot:
+        self.Reboot()
+
+      if opts.do_addremove_disks:
+        self.AddRemoveDisks()
+
+      if opts.do_addremove_nics:
+        self.AddRemoveNICs()
+
+      if opts.do_activate_disks:
+        self.ActivateDisks()
 
       if opts.rename:
         self.Rename()
 
+      if opts.do_startstop:
+        self.StopStart()
+
       has_err = False
     finally:
       if has_err: