cmdlib: Fix typo, s/nick/NIC/
[ganeti-local] / lib / hypervisor / hv_xen.py
index f954e23..8d36e20 100644 (file)
@@ -1,7 +1,7 @@
 #
 #
 
-# Copyright (C) 2006, 2007, 2008 Google Inc.
+# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -23,9 +23,6 @@
 
 """
 
-import os
-import os.path
-import time
 import logging
 from cStringIO import StringIO
 
@@ -33,6 +30,8 @@ from ganeti import constants
 from ganeti import errors
 from ganeti import utils
 from ganeti.hypervisor import hv_base
+from ganeti import netutils
+from ganeti import objects
 
 
 class XenHypervisor(hv_base.BaseHypervisor):
@@ -42,6 +41,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
   all the functionality that is identical for both.
 
   """
+  CAN_MIGRATE = True
   REBOOT_RETRY_COUNT = 60
   REBOOT_RETRY_INTERVAL = 10
 
@@ -85,7 +85,22 @@ class XenHypervisor(hv_base.BaseHypervisor):
     utils.RemoveFile("/etc/xen/%s" % instance_name)
 
   @staticmethod
-  def _GetXMList(include_node):
+  def _RunXmList(xmlist_errors):
+    """Helper function for L{_GetXMList} to run "xm list".
+
+    """
+    result = utils.RunCmd(["xm", "list"])
+    if result.failed:
+      logging.error("xm list failed (%s): %s", result.fail_reason,
+                    result.output)
+      xmlist_errors.append(result)
+      raise utils.RetryAgain()
+
+    # skip over the heading
+    return result.stdout.splitlines()[1:]
+
+  @classmethod
+  def _GetXMList(cls, include_node):
     """Return the list of running instances.
 
     If the include_node argument is True, then we return information
@@ -94,21 +109,20 @@ class XenHypervisor(hv_base.BaseHypervisor):
     @return: list of (name, id, memory, vcpus, state, time spent)
 
     """
-    for _ in range(5):
-      result = utils.RunCmd(["xm", "list"])
-      if not result.failed:
-        break
-      logging.error("xm list failed (%s): %s", result.fail_reason,
-                    result.output)
-      time.sleep(1)
+    xmlist_errors = []
+    try:
+      lines = utils.Retry(cls._RunXmList, 1, 5, args=(xmlist_errors, ))
+    except utils.RetryTimeout:
+      if xmlist_errors:
+        xmlist_result = xmlist_errors.pop()
 
-    if result.failed:
-      raise errors.HypervisorError("xm list failed, retries"
-                                   " exceeded (%s): %s" %
-                                   (result.fail_reason, result.output))
+        errmsg = ("xm list failed, timeout exceeded (%s): %s" %
+                  (xmlist_result.fail_reason, xmlist_result.output))
+      else:
+        errmsg = "xm list failed"
+
+      raise errors.HypervisorError(errmsg)
 
-    # skip over the heading
-    lines = result.stdout.splitlines()[1:]
     result = []
     for line in lines:
       # The format of lines is:
@@ -123,7 +137,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
         data[2] = int(data[2])
         data[3] = int(data[3])
         data[5] = float(data[5])
-      except ValueError, err:
+      except (TypeError, ValueError), err:
         raise errors.HypervisorError("Can't parse output of xm list,"
                                      " line: %s, error: %s" % (line, err))
 
@@ -178,46 +192,53 @@ class XenHypervisor(hv_base.BaseHypervisor):
                                    (instance.name, result.fail_reason,
                                     result.output))
 
-  def StopInstance(self, instance, force=False, retry=False):
+  def StopInstance(self, instance, force=False, retry=False, name=None):
     """Stop an instance.
 
     """
-    self._RemoveConfigFile(instance.name)
+    if name is None:
+      name = instance.name
+    self._RemoveConfigFile(name)
     if force:
-      command = ["xm", "destroy", instance.name]
+      command = ["xm", "destroy", name]
     else:
-      command = ["xm", "shutdown", instance.name]
+      command = ["xm", "shutdown", name]
     result = utils.RunCmd(command)
 
     if result.failed:
       raise errors.HypervisorError("Failed to stop instance %s: %s, %s" %
-                                   (instance.name, result.fail_reason,
-                                    result.output))
+                                   (name, result.fail_reason, result.output))
 
   def RebootInstance(self, instance):
     """Reboot an instance.
 
     """
     ini_info = self.GetInstanceInfo(instance.name)
-    result = utils.RunCmd(["xm", "reboot", instance.name])
 
+    if ini_info is None:
+      raise errors.HypervisorError("Failed to reboot instance %s,"
+                                   " not running" % instance.name)
+
+    result = utils.RunCmd(["xm", "reboot", instance.name])
     if result.failed:
       raise errors.HypervisorError("Failed to reboot instance %s: %s, %s" %
                                    (instance.name, result.fail_reason,
                                     result.output))
-    done = False
-    retries = self.REBOOT_RETRY_COUNT
-    while retries > 0:
+
+    def _CheckInstance():
       new_info = self.GetInstanceInfo(instance.name)
-      # check if the domain ID has changed or the run time has
-      # decreased
-      if new_info[1] != ini_info[1] or new_info[5] < ini_info[5]:
-        done = True
-        break
-      time.sleep(self.REBOOT_RETRY_INTERVAL)
-      retries -= 1
 
-    if not done:
+      # check if the domain ID has changed or the run time has decreased
+      if (new_info is not None and
+          (new_info[1] != ini_info[1] or new_info[5] < ini_info[5])):
+        return
+
+      raise utils.RetryAgain()
+
+    try:
+      utils.Retry(_CheckInstance, self.REBOOT_RETRY_INTERVAL,
+                  self.REBOOT_RETRY_INTERVAL * self.REBOOT_RETRY_COUNT)
+    except utils.RetryTimeout:
       raise errors.HypervisorError("Failed to reboot instance %s: instance"
                                    " did not reboot in the expected interval" %
                                    (instance.name, ))
@@ -274,12 +295,15 @@ class XenHypervisor(hv_base.BaseHypervisor):
     return result
 
   @classmethod
-  def GetShellCommandForConsole(cls, instance, hvparams, beparams):
+  def GetInstanceConsole(cls, instance, hvparams, beparams):
     """Return a command for connecting to the console of an instance.
 
     """
-    return "xm console %s" % instance.name
-
+    return objects.InstanceConsole(instance=instance.name,
+                                   kind=constants.CONS_SSH,
+                                   host=instance.primary_node,
+                                   user=constants.GANETI_RUNAS,
+                                   command=["xm", "console", instance.name])
 
   def Verify(self):
     """Verify the hypervisor.
@@ -292,16 +316,17 @@ class XenHypervisor(hv_base.BaseHypervisor):
       return "'xm info' failed: %s, %s" % (result.fail_reason, result.output)
 
   @staticmethod
-  def _GetConfigFileDiskData(disk_template, block_devices):
+  def _GetConfigFileDiskData(block_devices, blockdev_prefix):
     """Get disk directive for xen config file.
 
     This method builds the xen config disk directive according to the
     given disk_template and block_devices.
 
-    @param disk_template: string containing instance disk template
     @param block_devices: list of tuples (cfdev, rldev):
         - cfdev: dict containing ganeti config disk part
         - rldev: ganeti.bdev.BlockDev object
+    @param blockdev_prefix: a string containing blockdevice prefix,
+                            e.g. "sd" for /dev/sda
 
     @return: string containing disk directive for xen instance config file
 
@@ -314,9 +339,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
     if len(block_devices) > 24:
       # 'z' - 'a' = 24
       raise errors.HypervisorError("Too many disks")
-    # FIXME: instead of this hardcoding here, each of PVM/HVM should
-    # directly export their info (currently HVM will just sed this info)
-    namespace = ["sd" + chr(i + ord('a')) for i in range(24)]
+    namespace = [blockdev_prefix + chr(i + ord('a')) for i in range(24)]
     for sd_name, (cfdev, dev_path) in zip(namespace, block_devices):
       if cfdev.mode == constants.DISK_RDWR:
         mode = "w"
@@ -362,7 +385,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
     We do nothing on a failure, as we did not change anything at accept time.
 
     @type instance: L{objects.Instance}
-    @param instance: instance whose migration is being aborted
+    @param instance: instance whose migration is being finalized
     @type info: string
     @param info: content of the xen config file on the source node
     @type success: boolean
@@ -378,27 +401,34 @@ class XenHypervisor(hv_base.BaseHypervisor):
     The migration will not be attempted if the instance is not
     currently running.
 
-    @type instance: string
-    @param instance: instance name
+    @type instance: L{objects.Instance}
+    @param instance: the instance to be migrated
     @type target: string
     @param target: ip address of the target node
     @type live: boolean
     @param live: perform a live migration
 
     """
-    if self.GetInstanceInfo(instance) is None:
+    if self.GetInstanceInfo(instance.name) is None:
       raise errors.HypervisorError("Instance not running, cannot migrate")
-    args = ["xm", "migrate"]
+
+    port = instance.hvparams[constants.HV_MIGRATION_PORT]
+
+    if not netutils.TcpPing(target, port, live_port_needed=True):
+      raise errors.HypervisorError("Remote host %s not listening on port"
+                                   " %s, cannot migrate" % (target, port))
+
+    args = ["xm", "migrate", "-p", "%d" % port]
     if live:
       args.append("-l")
-    args.extend([instance, target])
+    args.extend([instance.name, target])
     result = utils.RunCmd(args)
     if result.failed:
       raise errors.HypervisorError("Failed to migrate instance %s: %s" %
-                                   (instance, result.output))
+                                   (instance.name, result.output))
     # remove old xen file after migration succeeded
     try:
-      self._RemoveConfigFile(instance)
+      self._RemoveConfigFile(instance.name)
     except EnvironmentError:
       logging.exception("Failure while removing instance config file")
 
@@ -431,6 +461,10 @@ class XenPvmHypervisor(XenHypervisor):
     constants.HV_INITRD_PATH: hv_base.OPT_FILE_CHECK,
     constants.HV_ROOT_PATH: hv_base.REQUIRED_CHECK,
     constants.HV_KERNEL_ARGS: hv_base.NO_CHECK,
+    constants.HV_MIGRATION_PORT: hv_base.NET_PORT_CHECK,
+    constants.HV_MIGRATION_MODE: hv_base.MIGRATION_MODE_CHECK,
+    # TODO: Add a check for the blockdev prefix (matching [a-z:] or similar).
+    constants.HV_BLOCKDEV_PREFIX: hv_base.NO_CHECK,
     }
 
   @classmethod
@@ -477,14 +511,15 @@ class XenPvmHypervisor(XenHypervisor):
       ip = getattr(nic, "ip", None)
       if ip is not None:
         nic_str += ", ip=%s" % ip
-      vif_data.append("'%s'" % nic_str)
       if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
         nic_str += ", bridge=%s" % nic.nicparams[constants.NIC_LINK]
+      vif_data.append("'%s'" % nic_str)
+
+    disk_data = cls._GetConfigFileDiskData(block_devices,
+                                           hvp[constants.HV_BLOCKDEV_PREFIX])
 
     config.write("vif = [%s]\n" % ",".join(vif_data))
-    config.write("disk = [%s]\n" % ",".join(
-                 cls._GetConfigFileDiskData(instance.disk_template,
-                                            block_devices)))
+    config.write("disk = [%s]\n" % ",".join(disk_data))
 
     config.write("root = '%s'\n" % hvp[constants.HV_ROOT_PATH])
     config.write("on_poweroff = 'destroy'\n")
@@ -523,11 +558,16 @@ class XenHvmHypervisor(XenHypervisor):
       hv_base.ParamInSet(True, constants.HT_HVM_VALID_NIC_TYPES),
     constants.HV_PAE: hv_base.NO_CHECK,
     constants.HV_VNC_BIND_ADDRESS:
-      (False, utils.IsValidIP,
+      (False, netutils.IP4Address.IsValid,
        "VNC bind address is not a valid IP address", None, None),
     constants.HV_KERNEL_PATH: hv_base.REQ_FILE_CHECK,
     constants.HV_DEVICE_MODEL: hv_base.REQ_FILE_CHECK,
     constants.HV_VNC_PASSWORD_FILE: hv_base.REQ_FILE_CHECK,
+    constants.HV_MIGRATION_PORT: hv_base.NET_PORT_CHECK,
+    constants.HV_MIGRATION_MODE: hv_base.MIGRATION_MODE_CHECK,
+    constants.HV_USE_LOCALTIME: hv_base.NO_CHECK,
+    # TODO: Add a check for the blockdev prefix (matching [a-z:] or similar).
+    constants.HV_BLOCKDEV_PREFIX: hv_base.NO_CHECK,
     }
 
   @classmethod
@@ -586,7 +626,8 @@ class XenHvmHypervisor(XenHypervisor):
     config.write("vncpasswd = '%s'\n" % password.rstrip())
 
     config.write("serial = 'pty'\n")
-    config.write("localtime = 1\n")
+    if hvp[constants.HV_USE_LOCALTIME]:
+      config.write("localtime = 1\n")
 
     vif_data = []
     nic_type = hvp[constants.HV_NIC_TYPE]
@@ -602,19 +643,15 @@ class XenHvmHypervisor(XenHypervisor):
       ip = getattr(nic, "ip", None)
       if ip is not None:
         nic_str += ", ip=%s" % ip
-      vif_data.append("'%s'" % nic_str)
       if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
         nic_str += ", bridge=%s" % nic.nicparams[constants.NIC_LINK]
+      vif_data.append("'%s'" % nic_str)
 
     config.write("vif = [%s]\n" % ",".join(vif_data))
-    disk_data = cls._GetConfigFileDiskData(instance.disk_template,
-                                            block_devices)
-    disk_type = hvp[constants.HV_DISK_TYPE]
-    if disk_type in (None, constants.HT_DISK_IOEMU):
-      replacement = ",ioemu:hd"
-    else:
-      replacement = ",hd"
-    disk_data = [line.replace(",sd", replacement) for line in disk_data]
+
+    disk_data = cls._GetConfigFileDiskData(block_devices,
+                                           hvp[constants.HV_BLOCKDEV_PREFIX])
+
     iso_path = hvp[constants.HV_CDROM_IMAGE_PATH]
     if iso_path:
       iso = "'file:%s,hdc:cdrom,r'" % iso_path