(2.10) RAPI: Make use of request_body in Reboot/Remove

[ganeti-local] / lib / backend.py
diff --git a/lib/backend.py b/lib/backend.py

index 3668d4f..2c5e5ae 100644 (file)
--- a/lib/backend.py
+++ b/lib/backend.py
@@ -60,11 +60,11 @@ from ganeti import ssconf
  from ganeti import serializer
  from ganeti import netutils
  from ganeti import runtime
-from ganeti import mcpu
  from ganeti import compat
  from ganeti import pathutils
  from ganeti import vcluster
  from ganeti import ht
+from ganeti import hooksmaster
  
  
  _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id"
@@ -110,7 +110,7 @@ class RPCFail(Exception):
    """
  
  
-def GetInstReasonFilename(instance_name):
+def _GetInstReasonFilename(instance_name):
    """Path of the file containing the reason of the instance status change.
  
    @type instance_name: string
@@ -122,47 +122,20 @@ def GetInstReasonFilename(instance_name):
    return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name)
  
  
-class InstReason(object):
-  """Class representing the reason for a change of state of a VM.
+def _StoreInstReasonTrail(instance_name, trail):
+  """Serialize a reason trail related to an instance change of state to file.
  
-  It is used to allow an easy serialization of the reason, so that it can be
-  written on a file.
+  The exact location of the file depends on the name of the instance and on
+  the configuration of the Ganeti cluster defined at deploy time.
  
-  """
-  def __init__(self, source, text):
-    """Initialize the class with all the required values.
-
-    @type text: string
-    @param text: The textual description of the reason for changing state
-    @type source: string
-    @param source: The source of the state change (RAPI, CLI, ...)
-
-    """
-    self.source = source
-    self.text = text
-
-  def GetJson(self):
-    """Get the JSON representation of the InstReason.
-
-    @rtype: string
-    @return : The JSON representation of the object
-
-    """
-    return serializer.DumpJson(dict(source=self.source, text=self.text))
-
-  def Store(self, instance_name):
-    """Serialize on a file the reason for the last state change of an instance.
-
-    The exact location of the file depends on the name of the instance and on
-    the configuration of the Ganeti cluster defined at deploy time.
-
-    @type instance_name: string
-    @param instance_name: The name of the instance
-    @rtype: None
+  @type instance_name: string
+  @param instance_name: The name of the instance
+  @rtype: None
  
-    """
-    filename = GetInstReasonFilename(instance_name)
-    utils.WriteFile(filename, data=self.GetJson())
+  """
+  json = serializer.DumpJson(trail)
+  filename = _GetInstReasonFilename(instance_name)
+  utils.WriteFile(filename, data=json)
  
  
  def _Fail(msg, *args, **kwargs):
@@ -352,10 +325,10 @@ def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn):
  
        cfg = _GetConfig()
        hr = HooksRunner()
-      hm = mcpu.HooksMaster(hook_opcode, hooks_path, nodes, hr.RunLocalHooks,
-                            None, env_fn, logging.warning, cfg.GetClusterName(),
-                            cfg.GetMasterNode())
-
+      hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes,
+                                   hr.RunLocalHooks, None, env_fn,
+                                   logging.warning, cfg.GetClusterName(),
+                                   cfg.GetMasterNode())
        hm.RunPhase(constants.HOOKS_PHASE_PRE)
        result = fn(*args, **kwargs)
        hm.RunPhase(constants.HOOKS_PHASE_POST)
@@ -1355,13 +1328,17 @@ def _GatherAndLinkBlockDevs(instance):
    return block_devices
  
  
-def StartInstance(instance, startup_paused):
+def StartInstance(instance, startup_paused, reason, store_reason=True):
    """Start an instance.
  
    @type instance: L{objects.Instance}
    @param instance: the instance object
    @type startup_paused: bool
    @param instance: pause instance at startup?
+  @type reason: list of reasons
+  @param reason: the reason trail for this startup
+  @type store_reason: boolean
+  @param store_reason: whether to store the shutdown reason trail on file
    @rtype: None
  
    """
@@ -1375,6 +1352,8 @@ def StartInstance(instance, startup_paused):
      block_devices = _GatherAndLinkBlockDevs(instance)
      hyper = hypervisor.GetHypervisor(instance.hypervisor)
      hyper.StartInstance(instance, block_devices, startup_paused)
+    if store_reason:
+      _StoreInstReasonTrail(instance.name, reason)
    except errors.BlockDeviceError, err:
      _Fail("Block device error: %s", err, exc=True)
    except errors.HypervisorError, err:
@@ -1382,7 +1361,7 @@ def StartInstance(instance, startup_paused):
      _Fail("Hypervisor error: %s", err, exc=True)
  
  
-def InstanceShutdown(instance, timeout):
+def InstanceShutdown(instance, timeout, reason, store_reason=True):
    """Shut an instance down.
  
    @note: this functions uses polling with a hardcoded timeout.
@@ -1391,6 +1370,10 @@ def InstanceShutdown(instance, timeout):
    @param instance: the instance object
    @type timeout: integer
    @param timeout: maximum timeout for soft shutdown
+  @type reason: list of reasons
+  @param reason: the reason trail for this shutdown
+  @type store_reason: boolean
+  @param store_reason: whether to store the shutdown reason trail on file
    @rtype: None
  
    """
@@ -1412,6 +1395,8 @@ def InstanceShutdown(instance, timeout):
  
        try:
          hyper.StopInstance(instance, retry=self.tried_once)
+        if store_reason:
+          _StoreInstReasonTrail(instance.name, reason)
        except errors.HypervisorError, err:
          if iname not in hyper.ListInstances():
            # if the instance is no longer existing, consider this a
@@ -1469,6 +1454,8 @@ def InstanceReboot(instance, reboot_type, shutdown_timeout, reason):
          instance (instead of a call_instance_reboot RPC)
    @type shutdown_timeout: integer
    @param shutdown_timeout: maximum timeout for soft shutdown
+  @type reason: list of reasons
+  @param reason: the reason trail for this reboot
    @rtype: None
  
    """
@@ -1481,14 +1468,13 @@ def InstanceReboot(instance, reboot_type, shutdown_timeout, reason):
    if reboot_type == constants.INSTANCE_REBOOT_SOFT:
      try:
        hyper.RebootInstance(instance)
-      reason.Store(instance.name)
      except errors.HypervisorError, err:
        _Fail("Failed to soft reboot instance %s: %s", instance.name, err)
    elif reboot_type == constants.INSTANCE_REBOOT_HARD:
      try:
-      InstanceShutdown(instance, shutdown_timeout)
-      result = StartInstance(instance, False)
-      reason.Store(instance.name)
+      InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False)
+      result = StartInstance(instance, False, reason, store_reason=False)
+      _StoreInstReasonTrail(instance.name, reason)
        return result
      except errors.HypervisorError, err:
        _Fail("Failed to hard reboot instance %s: %s", instance.name, err)
@@ -1640,6 +1626,54 @@ def GetMigrationStatus(instance):
      _Fail("Failed to get migration status: %s", err, exc=True)
  
  
+def HotplugDevice(instance, action, dev_type, device, extra, seq):
+  """Hotplug a device
+
+  Hotplug is currently supported only for KVM Hypervisor.
+  @type instance: L{objects.Instance}
+  @param instance: the instance to which we hotplug a device
+  @type action: string
+  @param action: the hotplug action to perform
+  @type dev_type: string
+  @param dev_type: the device type to hotplug
+  @type device: either L{objects.NIC} or L{objects.Disk}
+  @param device: the device object to hotplug
+  @type extra: string
+  @param extra: extra info used by hotplug code (e.g. disk link)
+  @type seq: int
+  @param seq: the index of the device from master perspective
+  @raise RPCFail: in case instance does not have KVM hypervisor
+
+  """
+  hyper = hypervisor.GetHypervisor(instance.hypervisor)
+  try:
+    hyper.VerifyHotplugSupport(instance, action, dev_type)
+  except errors.HotplugError, err:
+    _Fail("Hotplug is not supported: %s", err)
+
+  if action == constants.HOTPLUG_ACTION_ADD:
+    fn = hyper.HotAddDevice
+  elif action == constants.HOTPLUG_ACTION_REMOVE:
+    fn = hyper.HotDelDevice
+  elif action == constants.HOTPLUG_ACTION_MODIFY:
+    fn = hyper.HotModDevice
+  else:
+    assert action in constants.HOTPLUG_ALL_ACTIONS
+
+  return fn(instance, dev_type, device, extra, seq)
+
+
+def HotplugSupported(instance):
+  """Checks if hotplug is generally supported.
+
+  """
+  hyper = hypervisor.GetHypervisor(instance.hypervisor)
+  try:
+    hyper.HotplugSupported(instance)
+  except errors.HotplugError, err:
+    _Fail("Hotplug is not supported: %s", err)
+
+
  def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor):
    """Creates a block device for an instance.
  
@@ -1815,10 +1849,18 @@ def BlockdevRemove(disk):
      rdev = None
    if rdev is not None:
      r_path = rdev.dev_path
-    try:
-      rdev.Remove()
-    except errors.BlockDeviceError, err:
-      msgs.append(str(err))
+
+    def _TryRemove():
+      try:
+        rdev.Remove()
+        return []
+      except errors.BlockDeviceError, err:
+        return [str(err)]
+
+    msgs.extend(utils.SimpleRetry([], _TryRemove,
+                                  constants.DISK_REMOVE_RETRY_INTERVAL,
+                                  constants.DISK_REMOVE_RETRY_TIMEOUT))
+
      if not msgs:
        DevCacheManager.RemoveCache(r_path)
  
@@ -1892,23 +1934,28 @@ def BlockdevAssemble(disk, owner, as_primary, idx):
    This is a wrapper over _RecursiveAssembleBD.
  
    @rtype: str or boolean
-  @return: a C{/dev/...} path for primary nodes, and
-      C{True} for secondary nodes
+  @return: a tuple with the C{/dev/...} path and the created symlink
+      for primary nodes, and (C{True}, C{True}) for secondary nodes
  
    """
    try:
      result = _RecursiveAssembleBD(disk, owner, as_primary)
      if isinstance(result, bdev.BlockDev):
        # pylint: disable=E1103
-      result = result.dev_path
+      dev_path = result.dev_path
+      link_name = None
        if as_primary:
-        _SymlinkBlockDev(owner, result, idx)
+        link_name = _SymlinkBlockDev(owner, dev_path, idx)
+    elif result:
+      return result, result
+    else:
+      _Fail("Unexpected result from _RecursiveAssembleBD")
    except errors.BlockDeviceError, err:
      _Fail("Error while assembling disk: %s", err, exc=True)
    except OSError, err:
      _Fail("Error while symlinking disk: %s", err, exc=True)
  
-  return result
+  return dev_path, link_name
  
  
  def BlockdevShutdown(disk):
@@ -2537,6 +2584,9 @@ def OSEnvironment(instance, inst_os, debug=0):
      real_disk = _OpenRealBD(disk)
      result["DISK_%d_PATH" % idx] = real_disk.dev_path
      result["DISK_%d_ACCESS" % idx] = disk.mode
+    result["DISK_%d_UUID" % idx] = disk.uuid
+    if disk.name:
+      result["DISK_%d_NAME" % idx] = disk.name
      if constants.HV_DISK_TYPE in instance.hvparams:
        result["DISK_%d_FRONTEND_TYPE" % idx] = \
          instance.hvparams[constants.HV_DISK_TYPE]
@@ -2549,6 +2599,9 @@ def OSEnvironment(instance, inst_os, debug=0):
    # NICs
    for idx, nic in enumerate(instance.nics):
      result["NIC_%d_MAC" % idx] = nic.mac
+    result["NIC_%d_UUID" % idx] = nic.uuid
+    if nic.name:
+      result["NIC_%d_NAME" % idx] = nic.name
      if nic.ip:
        result["NIC_%d_IP" % idx] = nic.ip
      result["NIC_%d_MODE" % idx] = nic.nicparams[constants.NIC_MODE]
@@ -3630,8 +3683,20 @@ def DrbdAttachNet(nodes_ip, disks, instance_name, multimaster):
      for rd in bdevs:
        stats = rd.GetProcStatus()
  
-      all_connected = (all_connected and
-                       (stats.is_connected or stats.is_in_resync))
+      if multimaster:
+        # In the multimaster case we have to wait explicitly until
+        # the resource is Connected and UpToDate/UpToDate, because
+        # we promote *both nodes* to primary directly afterwards.
+        # Being in resync is not enough, since there is a race during which we
+        # may promote a node with an Outdated disk to primary, effectively
+        # tearing down the connection.
+        all_connected = (all_connected and
+                         stats.is_connected and
+                         stats.is_disk_uptodate and
+                         stats.peer_disk_uptodate)
+      else:
+        all_connected = (all_connected and
+                         (stats.is_connected or stats.is_in_resync))
  
        if stats.is_standalone:
          # peer had different config info and this node became