(2.10) Export NIC's UUID and name to network scripts
[ganeti-local] / lib / hypervisor / hv_xen.py
index 7b07e98..04a6f52 100644 (file)
@@ -24,7 +24,9 @@
 """
 
 import logging
+import errno
 import string # pylint: disable=W0402
+import shutil
 from cStringIO import StringIO
 
 from ganeti import constants
@@ -47,6 +49,7 @@ _DISK_LETTERS = string.ascii_lowercase
 _FILE_DRIVER_MAP = {
   constants.FD_LOOP: "file",
   constants.FD_BLKTAP: "tap:aio",
+  constants.FD_BLKTAP2: "tap2:tapdisk:aio",
   }
 
 
@@ -165,6 +168,15 @@ def _GetXmList(fn, include_node, _timeout=5):
   return _ParseXmList(lines, include_node)
 
 
+def _IsInstanceRunning(instance_info):
+  return instance_info == "r-----" \
+      or instance_info == "-b----"
+
+
+def _IsInstanceShutdown(instance_info):
+  return instance_info == "---s--"
+
+
 def _ParseNodeInfo(info):
   """Return information about the node.
 
@@ -310,6 +322,9 @@ class XenHypervisor(hv_base.BaseHypervisor):
   CAN_MIGRATE = True
   REBOOT_RETRY_COUNT = 60
   REBOOT_RETRY_INTERVAL = 10
+  _ROOT_DIR = pathutils.RUN_DIR + "/xen-hypervisor"
+  _NICS_DIR = _ROOT_DIR + "/nic" # contains NICs' info
+  _DIRS = [_ROOT_DIR, _NICS_DIR]
 
   ANCILLARY_FILES = [
     XEND_CONFIG_FILE,
@@ -320,7 +335,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
     XL_CONFIG_FILE,
     ]
 
-  def __init__(self, _cfgdir=None):
+  def __init__(self, _cfgdir=None, _run_cmd_fn=None, _cmd=None):
     hv_base.BaseHypervisor.__init__(self)
 
     if _cfgdir is None:
@@ -328,6 +343,47 @@ class XenHypervisor(hv_base.BaseHypervisor):
     else:
       self._cfgdir = _cfgdir
 
+    if _run_cmd_fn is None:
+      self._run_cmd_fn = utils.RunCmd
+    else:
+      self._run_cmd_fn = _run_cmd_fn
+
+    self._cmd = _cmd
+
+  def _GetCommand(self):
+    """Returns Xen command to use.
+
+    """
+    if self._cmd is None:
+      # TODO: Make command a hypervisor parameter
+      cmd = constants.XEN_CMD
+    else:
+      cmd = self._cmd
+
+    if cmd not in constants.KNOWN_XEN_COMMANDS:
+      raise errors.ProgrammerError("Unknown Xen command '%s'" % cmd)
+
+    return cmd
+
+  def _RunXen(self, args, timeout=None):
+    """Wrapper around L{utils.process.RunCmd} to run Xen command.
+
+    If a timeout (in seconds) is specified, the command will be terminated after
+    that number of seconds.
+
+    @see: L{utils.process.RunCmd}
+
+    """
+    cmd = []
+
+    if timeout is not None:
+      cmd.extend(["timeout", str(timeout)])
+
+    cmd.extend([self._GetCommand()])
+    cmd.extend(args)
+
+    return self._run_cmd_fn(cmd)
+
   def _ConfigFileName(self, instance_name):
     """Get the config file name for an instance.
 
@@ -340,6 +396,57 @@ class XenHypervisor(hv_base.BaseHypervisor):
     return utils.PathJoin(self._cfgdir, instance_name)
 
   @classmethod
+  def _WriteNICInfoFile(cls, instance, idx, nic):
+    """Write the Xen config file for the instance.
+
+    This version of the function just writes the config file from static data.
+
+    """
+    instance_name = instance.name
+    dirs = [(dname, constants.RUN_DIRS_MODE)
+            for dname in cls._DIRS + [cls._InstanceNICDir(instance_name)]]
+    utils.EnsureDirs(dirs)
+
+    cfg_file = cls._InstanceNICFile(instance_name, idx)
+    data = StringIO()
+
+    data.write("TAGS=%s\n" % "\ ".join(instance.GetTags()))
+    if nic.netinfo:
+      netinfo = objects.Network.FromDict(nic.netinfo)
+      for k, v in netinfo.HooksDict().iteritems():
+        data.write("%s=%s\n" % (k, v))
+
+    data.write("MAC=%s\n" % nic.mac)
+    if nic.ip:
+      data.write("IP=%s\n" % nic.ip)
+    data.write("INTERFACE_INDEX=%s\n" % str(idx))
+    if nic.name:
+      data.write("INTERFACE_NAME=%s\n" % nic.name)
+    data.write("INTERFACE_UUID=%s\n" % nic.uuid)
+    data.write("MODE=%s\n" % nic.nicparams[constants.NIC_MODE])
+    data.write("LINK=%s\n" % nic.nicparams[constants.NIC_LINK])
+
+    try:
+      utils.WriteFile(cfg_file, data=data.getvalue())
+    except EnvironmentError, err:
+      raise errors.HypervisorError("Cannot write Xen instance configuration"
+                                   " file %s: %s" % (cfg_file, err))
+
+  @classmethod
+  def _InstanceNICDir(cls, instance_name):
+    """Returns the directory holding the tap device files for a given instance.
+
+    """
+    return utils.PathJoin(cls._NICS_DIR, instance_name)
+
+  @classmethod
+  def _InstanceNICFile(cls, instance_name, seq):
+    """Returns the name of the file containing the tap device for a given NIC
+
+    """
+    return utils.PathJoin(cls._InstanceNICDir(instance_name), str(seq))
+
+  @classmethod
   def _GetConfig(cls, instance, startup_memory, block_devices):
     """Build Xen configuration for an instance.
 
@@ -380,15 +487,28 @@ class XenHypervisor(hv_base.BaseHypervisor):
 
     """
     utils.RemoveFile(self._ConfigFileName(instance_name))
+    try:
+      shutil.rmtree(self._InstanceNICDir(instance_name))
+    except OSError, err:
+      if err.errno != errno.ENOENT:
+        raise
 
-  @staticmethod
-  def _GetXmList(include_node):
+  def _StashConfigFile(self, instance_name):
+    """Move the Xen config file to the log directory and return its new path.
+
+    """
+    old_filename = self._ConfigFileName(instance_name)
+    base = ("%s-%s" %
+            (instance_name, utils.TimestampForFilename()))
+    new_filename = utils.PathJoin(pathutils.LOG_XEN_DIR, base)
+    utils.RenameFile(old_filename, new_filename)
+    return new_filename
+
+  def _GetXmList(self, include_node):
     """Wrapper around module level L{_GetXmList}.
 
     """
-    # TODO: Abstract running Xen command for testing
-    return _GetXmList(lambda: utils.RunCmd([constants.XEN_CMD, "list"]),
-                      include_node)
+    return _GetXmList(lambda: self._RunXen(["list"]), include_node)
 
   def ListInstances(self):
     """Get the list of running instances.
@@ -445,34 +565,100 @@ class XenHypervisor(hv_base.BaseHypervisor):
 
     self._MakeConfigFile(instance, startup_memory, block_devices)
 
-    cmd = [constants.XEN_CMD, "create"]
+    cmd = ["create"]
     if startup_paused:
-      cmd.extend(["-p"])
-    cmd.extend([self._ConfigFileName(instance.name)])
-    result = utils.RunCmd(cmd)
+      cmd.append("-p")
+    cmd.append(self._ConfigFileName(instance.name))
 
+    result = self._RunXen(cmd)
     if result.failed:
-      raise errors.HypervisorError("Failed to start instance %s: %s (%s)" %
+      # Move the Xen configuration file to the log directory to avoid
+      # leaving a stale config file behind.
+      stashed_config = self._StashConfigFile(instance.name)
+      raise errors.HypervisorError("Failed to start instance %s: %s (%s). Moved"
+                                   " config file to %s" %
                                    (instance.name, result.fail_reason,
-                                    result.output))
+                                    result.output, stashed_config))
 
-  def StopInstance(self, instance, force=False, retry=False, name=None):
+  def StopInstance(self, instance, force=False, retry=False, name=None,
+                   timeout=None):
     """Stop an instance.
 
+    A soft shutdown can be interrupted. A hard shutdown tries forever.
+
     """
+    assert(timeout is None or force is not None)
+
     if name is None:
       name = instance.name
-    self._RemoveConfigFile(name)
+
+    return self._StopInstance(name, force, timeout)
+
+  def _ShutdownInstance(self, name, timeout):
+    """Shutdown an instance if the instance is running.
+
+    The '-w' flag waits for shutdown to complete which avoids the need
+    to poll in the case where we want to destroy the domain
+    immediately after shutdown.
+
+    @type name: string
+    @param name: name of the instance to stop
+    @type timeout: int or None
+    @param timeout: a timeout after which the shutdown command should be killed,
+                    or None for no timeout
+
+    """
+    instance_info = self.GetInstanceInfo(name)
+
+    if instance_info is None or _IsInstanceShutdown(instance_info[4]):
+      logging.info("Failed to shutdown instance %s, not running", name)
+      return None
+
+    return self._RunXen(["shutdown", "-w", name], timeout)
+
+  def _DestroyInstance(self, name):
+    """Destroy an instance if the instance if the instance exists.
+
+    @type name: string
+    @param name: name of the instance to destroy
+
+    """
+    instance_info = self.GetInstanceInfo(name)
+
+    if instance_info is None:
+      logging.info("Failed to destroy instance %s, does not exist", name)
+      return None
+
+    return self._RunXen(["destroy", name])
+
+  def _StopInstance(self, name, force, timeout):
+    """Stop an instance.
+
+    @type name: string
+    @param name: name of the instance to destroy
+
+    @type force: boolean
+    @param force: whether to do a "hard" stop (destroy)
+
+    @type timeout: int or None
+    @param timeout: a timeout after which the shutdown command should be killed,
+                    or None for no timeout
+
+    """
     if force:
-      command = [constants.XEN_CMD, "destroy", name]
+      result = self._DestroyInstance(name)
     else:
-      command = [constants.XEN_CMD, "shutdown", name]
-    result = utils.RunCmd(command)
+      self._ShutdownInstance(name, timeout)
+      result = self._DestroyInstance(name)
 
-    if result.failed:
+    if result is not None and result.failed and \
+          self.GetInstanceInfo(name) is not None:
       raise errors.HypervisorError("Failed to stop instance %s: %s, %s" %
                                    (name, result.fail_reason, result.output))
 
+    # Remove configuration file if stopping/starting instance was successful
+    self._RemoveConfigFile(name)
+
   def RebootInstance(self, instance):
     """Reboot an instance.
 
@@ -483,7 +669,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
       raise errors.HypervisorError("Failed to reboot instance %s,"
                                    " not running" % instance.name)
 
-    result = utils.RunCmd([constants.XEN_CMD, "reboot", instance.name])
+    result = self._RunXen(["reboot", instance.name])
     if result.failed:
       raise errors.HypervisorError("Failed to reboot instance %s: %s, %s" %
                                    (instance.name, result.fail_reason,
@@ -516,14 +702,16 @@ class XenHypervisor(hv_base.BaseHypervisor):
     @param mem: actual memory size to use for instance runtime
 
     """
-    cmd = [constants.XEN_CMD, "mem-set", instance.name, mem]
-    result = utils.RunCmd(cmd)
+    result = self._RunXen(["mem-set", instance.name, mem])
     if result.failed:
       raise errors.HypervisorError("Failed to balloon instance %s: %s (%s)" %
                                    (instance.name, result.fail_reason,
                                     result.output))
+
+    # Update configuration file
     cmd = ["sed", "-ie", "s/^memory.*$/memory = %s/" % mem]
     cmd.append(self._ConfigFileName(instance.name))
+
     result = utils.RunCmd(cmd)
     if result.failed:
       raise errors.HypervisorError("Failed to update memory for %s: %s (%s)" %
@@ -536,8 +724,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
     @see: L{_GetNodeInfo} and L{_ParseNodeInfo}
 
     """
-    # TODO: Abstract running Xen command for testing
-    result = utils.RunCmd([constants.XEN_CMD, "info"])
+    result = self._RunXen(["info"])
     if result.failed:
       logging.error("Can't run 'xm info' (%s): %s", result.fail_reason,
                     result.output)
@@ -565,7 +752,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
     @return: Problem description if something is wrong, C{None} otherwise
 
     """
-    result = utils.RunCmd([constants.XEN_CMD, "info"])
+    result = self._RunXen(["info"])
     if result.failed:
       return "'xm info' failed: %s, %s" % (result.fail_reason, result.output)
 
@@ -626,34 +813,53 @@ class XenHypervisor(hv_base.BaseHypervisor):
     @param live: perform a live migration
 
     """
-    if self.GetInstanceInfo(instance.name) is None:
+    port = instance.hvparams[constants.HV_MIGRATION_PORT]
+
+    # TODO: Pass cluster name via RPC
+    cluster_name = ssconf.SimpleStore().GetClusterName()
+
+    return self._MigrateInstance(cluster_name, instance.name, target, port,
+                                 live)
+
+  def _MigrateInstance(self, cluster_name, instance_name, target, port, live,
+                       _ping_fn=netutils.TcpPing):
+    """Migrate an instance to a target node.
+
+    @see: L{MigrateInstance} for details
+
+    """
+    if self.GetInstanceInfo(instance_name) is None:
       raise errors.HypervisorError("Instance not running, cannot migrate")
 
-    port = instance.hvparams[constants.HV_MIGRATION_PORT]
+    cmd = self._GetCommand()
 
-    if (constants.XEN_CMD == constants.XEN_CMD_XM and
-        not netutils.TcpPing(target, port, live_port_needed=True)):
+    if (cmd == constants.XEN_CMD_XM and
+        not _ping_fn(target, port, live_port_needed=True)):
       raise errors.HypervisorError("Remote host %s not listening on port"
                                    " %s, cannot migrate" % (target, port))
 
-    args = [constants.XEN_CMD, "migrate"]
-    if constants.XEN_CMD == constants.XEN_CMD_XM:
+    args = ["migrate"]
+
+    if cmd == constants.XEN_CMD_XM:
       args.extend(["-p", "%d" % port])
       if live:
         args.append("-l")
-    elif constants.XEN_CMD == constants.XEN_CMD_XL:
-      cluster_name = ssconf.SimpleStore().GetClusterName()
-      args.extend(["-s", constants.XL_SSH_CMD % cluster_name])
-      args.extend(["-C", self._ConfigFileName(instance.name)])
+
+    elif cmd == constants.XEN_CMD_XL:
+      args.extend([
+        "-s", constants.XL_SSH_CMD % cluster_name,
+        "-C", self._ConfigFileName(instance_name),
+        ])
+
     else:
-      raise errors.HypervisorError("Unsupported xen command: %s" %
-                                   constants.XEN_CMD)
+      raise errors.HypervisorError("Unsupported Xen command: %s" % self._cmd)
+
+    args.extend([instance_name, target])
 
-    args.extend([instance.name, target])
-    result = utils.RunCmd(args)
+    result = self._RunXen(args)
     if result.failed:
       raise errors.HypervisorError("Failed to migrate instance %s: %s" %
-                                   (instance.name, result.output))
+                                   (instance_name, result.output))
 
   def FinalizeMigrationSource(self, instance, success, live):
     """Finalize the instance migration on the source node.
@@ -730,6 +936,7 @@ class XenPvmHypervisor(XenHypervisor):
     constants.HV_CPU_CAP: hv_base.OPT_NONNEGATIVE_INT_CHECK,
     constants.HV_CPU_WEIGHT:
       (False, lambda x: 0 < x < 65536, "invalid weight", None, None),
+    constants.HV_VIF_SCRIPT: hv_base.OPT_FILE_CHECK,
     }
 
   def _GetConfig(self, instance, startup_memory, block_devices):
@@ -781,14 +988,17 @@ class XenPvmHypervisor(XenHypervisor):
     config.write("name = '%s'\n" % instance.name)
 
     vif_data = []
-    for nic in instance.nics:
+    for idx, nic in enumerate(instance.nics):
       nic_str = "mac=%s" % (nic.mac)
       ip = getattr(nic, "ip", None)
       if ip is not None:
         nic_str += ", ip=%s" % ip
       if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
         nic_str += ", bridge=%s" % nic.nicparams[constants.NIC_LINK]
+      if hvp[constants.HV_VIF_SCRIPT]:
+        nic_str += ", script=%s" % hvp[constants.HV_VIF_SCRIPT]
       vif_data.append("'%s'" % nic_str)
+      self._WriteNICInfoFile(instance, idx, nic)
 
     disk_data = \
       _GetConfigFileDiskData(block_devices, hvp[constants.HV_BLOCKDEV_PREFIX])
@@ -850,6 +1060,10 @@ class XenHvmHypervisor(XenHypervisor):
     constants.HV_CPU_CAP: hv_base.NO_CHECK,
     constants.HV_CPU_WEIGHT:
       (False, lambda x: 0 < x < 65535, "invalid weight", None, None),
+    constants.HV_VIF_TYPE:
+      hv_base.ParamInSet(False, constants.HT_HVM_VALID_VIF_TYPES),
+    constants.HV_VIRIDIAN: hv_base.NO_CHECK,
+    constants.HV_VIF_SCRIPT: hv_base.OPT_FILE_CHECK,
     }
 
   def _GetConfig(self, instance, startup_memory, block_devices):
@@ -887,6 +1101,11 @@ class XenHvmHypervisor(XenHypervisor):
       config.write("acpi = 1\n")
     else:
       config.write("acpi = 0\n")
+    if hvp[constants.HV_VIRIDIAN]:
+      config.write("viridian = 1\n")
+    else:
+      config.write("viridian = 0\n")
+
     config.write("apic = 1\n")
     config.write("device_model = '%s'\n" % hvp[constants.HV_DEVICE_MODEL])
     config.write("boot = '%s'\n" % hvp[constants.HV_BOOT_ORDER])
@@ -921,22 +1140,34 @@ class XenHvmHypervisor(XenHypervisor):
       config.write("localtime = 1\n")
 
     vif_data = []
+    # Note: what is called 'nic_type' here, is used as value for the xen nic
+    # vif config parameter 'model'. For the xen nic vif parameter 'type', we use
+    # the 'vif_type' to avoid a clash of notation.
     nic_type = hvp[constants.HV_NIC_TYPE]
+
     if nic_type is None:
+      vif_type_str = ""
+      if hvp[constants.HV_VIF_TYPE]:
+        vif_type_str = ", type=%s" % hvp[constants.HV_VIF_TYPE]
       # ensure old instances don't change
-      nic_type_str = ", type=ioemu"
+      nic_type_str = vif_type_str
     elif nic_type == constants.HT_NIC_PARAVIRTUAL:
       nic_type_str = ", type=paravirtualized"
     else:
-      nic_type_str = ", model=%s, type=ioemu" % nic_type
-    for nic in instance.nics:
+      # parameter 'model' is only valid with type 'ioemu'
+      nic_type_str = ", model=%s, type=%s" % \
+        (nic_type, constants.HT_HVM_VIF_IOEMU)
+    for idx, nic in enumerate(instance.nics):
       nic_str = "mac=%s%s" % (nic.mac, nic_type_str)
       ip = getattr(nic, "ip", None)
       if ip is not None:
         nic_str += ", ip=%s" % ip
       if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
         nic_str += ", bridge=%s" % nic.nicparams[constants.NIC_LINK]
+      if hvp[constants.HV_VIF_SCRIPT]:
+        nic_str += ", script=%s" % hvp[constants.HV_VIF_SCRIPT]
       vif_data.append("'%s'" % nic_str)
+      self._WriteNICInfoFile(instance, idx, nic)
 
     config.write("vif = [%s]\n" % ",".join(vif_data))