bootstrap: Wait for node daemon when adding new node

[ganeti-local] / lib / cmdlib.py
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index 93abd9c..afffbfb 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -95,6 +95,10 @@ class LogicalUnit(object):
      self.LogStep = processor.LogStep # pylint: disable-msg=C0103
      # support for dry-run
      self.dry_run_result = None
+    # support for generic debug attribute
+    if (not hasattr(self.op, "debug_level") or
+        not isinstance(self.op.debug_level, int)):
+      self.op.debug_level = 0
  
      # Tasklets
      self.tasklets = None
@@ -300,12 +304,9 @@ class LogicalUnit(object):
      else:
        assert locking.LEVEL_INSTANCE not in self.needed_locks, \
          "_ExpandAndLockInstance called with instance-level locks set"
-    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
-    if expanded_name is None:
-      raise errors.OpPrereqError("Instance '%s' not known" %
-                                 self.op.instance_name, errors.ECODE_NOENT)
-    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
-    self.op.instance_name = expanded_name
+    self.op.instance_name = _ExpandInstanceName(self.cfg,
+                                                self.op.instance_name)
+    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
  
    def _LockInstancesNodes(self, primary_only=False):
      """Helper function to declare instances' nodes for locking.
@@ -427,7 +428,7 @@ def _GetWantedNodes(lu, nodes):
    @param nodes: list of node names or None for all nodes
    @rtype: list
    @return: the list of nodes, sorted
-  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
+  @raise errors.ProgrammerError: if the nodes parameter is wrong type
  
    """
    if not isinstance(nodes, list):
@@ -438,14 +439,7 @@ def _GetWantedNodes(lu, nodes):
      raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
        " non-empty list of nodes whose name is to be expanded.")
  
-  wanted = []
-  for name in nodes:
-    node = lu.cfg.ExpandNodeName(name)
-    if node is None:
-      raise errors.OpPrereqError("No such node name '%s'" % name,
-                                 errors.ECODE_NOENT)
-    wanted.append(node)
-
+  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
    return utils.NiceSort(wanted)
  
  
@@ -467,15 +461,7 @@ def _GetWantedInstances(lu, instances):
                                 errors.ECODE_INVAL)
  
    if instances:
-    wanted = []
-
-    for name in instances:
-      instance = lu.cfg.ExpandInstanceName(name)
-      if instance is None:
-        raise errors.OpPrereqError("No such instance name '%s'" % name,
-                                   errors.ECODE_NOENT)
-      wanted.append(instance)
-
+    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
    else:
      wanted = utils.NiceSort(lu.cfg.GetInstanceList())
    return wanted
@@ -555,6 +541,33 @@ def _CheckNodeNotDrained(lu, node):
                                 errors.ECODE_INVAL)
  
  
+def _ExpandItemName(fn, name, kind):
+  """Expand an item name.
+
+  @param fn: the function to use for expansion
+  @param name: requested item name
+  @param kind: text description ('Node' or 'Instance')
+  @return: the resolved (full) name
+  @raise errors.OpPrereqError: if the item is not found
+
+  """
+  full_name = fn(name)
+  if full_name is None:
+    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
+                               errors.ECODE_NOENT)
+  return full_name
+
+
+def _ExpandNodeName(cfg, name):
+  """Wrapper over L{_ExpandItemName} for nodes."""
+  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
+
+
+def _ExpandInstanceName(cfg, name):
+  """Wrapper over L{_ExpandItemName} for instance."""
+  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
+
+
  def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
                            memory, vcpus, nics, disk_template, disks,
                            bep, hvp, hypervisor_name):
@@ -1603,8 +1616,8 @@ class LUVerifyCluster(LogicalUnit):
          test = msg and not res.offline
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
                        "Communication failure in hooks execution: %s", msg)
-        if res.offline:
-          # No need to investigate payload if node is offline.
+        if res.offline or msg:
+          # No need to investigate payload if node is offline or gave an error.
            # override manually lu_result here as _ErrorIf only
            # overrides self.bad
            lu_result = 1
@@ -1719,10 +1732,7 @@ class LURepairDiskSizes(NoHooksLU):
      if self.op.instances:
        self.wanted_names = []
        for name in self.op.instances:
-        full_name = self.cfg.ExpandInstanceName(name)
-        if full_name is None:
-          raise errors.OpPrereqError("Instance '%s' not known" % name,
-                                     errors.ECODE_NOENT)
+        full_name = _ExpandInstanceName(self.cfg, name)
          self.wanted_names.append(full_name)
        self.needed_locks = {
          locking.LEVEL_NODE: [],
@@ -2425,8 +2435,11 @@ class LURemoveNode(LogicalUnit):
        "NODE_NAME": self.op.node_name,
        }
      all_nodes = self.cfg.GetNodeList()
-    if self.op.node_name in all_nodes:
+    try:
        all_nodes.remove(self.op.node_name)
+    except ValueError:
+      logging.warning("Node %s which is about to be removed not found"
+                      " in the all nodes list", self.op.node_name)
      return env, all_nodes, all_nodes
  
    def CheckPrereq(self):
@@ -2440,10 +2453,9 @@ class LURemoveNode(LogicalUnit):
      Any errors are signaled by raising errors.OpPrereqError.
  
      """
-    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
-    if node is None:
-      raise errors.OpPrereqError("Node '%s' is unknown." % self.op.node_name,
-                                 errors.ECODE_NOENT)
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
+    node = self.cfg.GetNodeInfo(self.op.node_name)
+    assert node is not None
  
      instance_list = self.cfg.GetInstanceList()
  
@@ -2843,12 +2855,7 @@ class LUModifyNodeStorage(NoHooksLU):
    REQ_BGL = False
  
    def CheckArguments(self):
-    node_name = self.cfg.ExpandNodeName(self.op.node_name)
-    if node_name is None:
-      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
-                                 errors.ECODE_NOENT)
-
-    self.op.node_name = node_name
+    self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
  
      storage_type = self.op.storage_type
      if storage_type not in constants.VALID_STORAGE_TYPES:
@@ -3122,11 +3129,7 @@ class LUSetNodeParams(LogicalUnit):
    REQ_BGL = False
  
    def CheckArguments(self):
-    node_name = self.cfg.ExpandNodeName(self.op.node_name)
-    if node_name is None:
-      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
-                                 errors.ECODE_INVAL)
-    self.op.node_name = node_name
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
      _CheckBooleanOpField(self.op, 'master_candidate')
      _CheckBooleanOpField(self.op, 'offline')
      _CheckBooleanOpField(self.op, 'drained')
@@ -3275,12 +3278,8 @@ class LUPowercycleNode(NoHooksLU):
    REQ_BGL = False
  
    def CheckArguments(self):
-    node_name = self.cfg.ExpandNodeName(self.op.node_name)
-    if node_name is None:
-      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
-                                 errors.ECODE_NOENT)
-    self.op.node_name = node_name
-    if node_name == self.cfg.GetMasterNode() and not self.op.force:
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
+    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
        raise errors.OpPrereqError("The node is the master and the force"
                                   " parameter was not set",
                                   errors.ECODE_INVAL)
@@ -3962,14 +3961,10 @@ class LUReinstallInstance(LogicalUnit):
      self.op.force_variant = getattr(self.op, "force_variant", False)
      if self.op.os_type is not None:
        # OS verification
-      pnode = self.cfg.GetNodeInfo(
-        self.cfg.ExpandNodeName(instance.primary_node))
-      if pnode is None:
-        raise errors.OpPrereqError("Primary node '%s' is unknown" %
-                                   self.op.pnode, errors.ECODE_NOENT)
-      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
+      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
+      result = self.rpc.call_os_get(pnode, self.op.os_type)
        result.Raise("OS '%s' not in supported OS list for primary node %s" %
-                   (self.op.os_type, pnode.name),
+                   (self.op.os_type, pnode),
                     prereq=True, ecode=errors.ECODE_INVAL)
        if not self.op.force_variant:
          _CheckOSVariant(result.payload, self.op.os_type)
@@ -3990,7 +3985,9 @@ class LUReinstallInstance(LogicalUnit):
      _StartInstanceDisks(self, inst, None)
      try:
        feedback_fn("Running the instance OS create scripts...")
-      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
+      # FIXME: pass debug option from opcode to backend
+      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
+                                             self.op.debug_level)
        result.Raise("Could not install OS for instance %s on node %s" %
                     (inst.name, inst.primary_node))
      finally:
@@ -4106,11 +4103,10 @@ class LURenameInstance(LogicalUnit):
      This checks that the instance is in the cluster and is not running.
  
      """
-    instance = self.cfg.GetInstanceInfo(
-      self.cfg.ExpandInstanceName(self.op.instance_name))
-    if instance is None:
-      raise errors.OpPrereqError("Instance '%s' not known" %
-                                 self.op.instance_name, errors.ECODE_NOENT)
+    self.op.instance_name = _ExpandInstanceName(self.cfg,
+                                                self.op.instance_name)
+    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+    assert instance is not None
      _CheckNodeOnline(self, instance.primary_node)
  
      if instance.admin_up:
@@ -4174,7 +4170,7 @@ class LURenameInstance(LogicalUnit):
      _StartInstanceDisks(self, inst, None)
      try:
        result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
-                                                 old_name)
+                                                 old_name, self.op.debug_level)
        msg = result.fail_msg
        if msg:
          msg = ("Could not run OS rename script for instance %s on node %s"
@@ -4219,7 +4215,8 @@ class LURemoveInstance(LogicalUnit):
      env = _BuildInstanceHookEnvByObject(self, self.instance)
      env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
      nl = [self.cfg.GetMasterNode()]
-    return env, nl, nl
+    nl_post = list(self.instance.all_nodes) + nl
+    return env, nl, nl_post
  
    def CheckPrereq(self):
      """Check prerequisites.
@@ -4578,13 +4575,22 @@ class LUFailoverInstance(LogicalUnit):
      This runs on master, primary and secondary nodes of the instance.
  
      """
+    instance = self.instance
+    source_node = instance.primary_node
+    target_node = instance.secondary_nodes[0]
      env = {
        "IGNORE_CONSISTENCY": self.op.ignore_consistency,
        "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
+      "OLD_PRIMARY": source_node,
+      "OLD_SECONDARY": target_node,
+      "NEW_PRIMARY": target_node,
+      "NEW_SECONDARY": source_node,
        }
-    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
-    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
-    return env, nl, nl
+    env.update(_BuildInstanceHookEnvByObject(self, instance))
+    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
+    nl_post = list(nl)
+    nl_post.append(source_node)
+    return env, nl, nl_post
  
    def CheckPrereq(self):
      """Check prerequisites.
@@ -4726,11 +4732,21 @@ class LUMigrateInstance(LogicalUnit):
  
      """
      instance = self._migrater.instance
+    source_node = instance.primary_node
+    target_node = instance.secondary_nodes[0]
      env = _BuildInstanceHookEnvByObject(self, instance)
      env["MIGRATE_LIVE"] = self.op.live
      env["MIGRATE_CLEANUP"] = self.op.cleanup
+    env.update({
+        "OLD_PRIMARY": source_node,
+        "OLD_SECONDARY": target_node,
+        "NEW_PRIMARY": target_node,
+        "NEW_SECONDARY": source_node,
+        })
      nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
-    return env, nl, nl
+    nl_post = list(nl)
+    nl_post.append(source_node)
+    return env, nl, nl_post
  
  
  class LUMoveInstance(LogicalUnit):
@@ -4751,10 +4767,7 @@ class LUMoveInstance(LogicalUnit):
  
    def ExpandNames(self):
      self._ExpandAndLockInstance()
-    target_node = self.cfg.ExpandNodeName(self.op.target_node)
-    if target_node is None:
-      raise errors.OpPrereqError("Node '%s' not known" %
-                                  self.op.target_node, errors.ECODE_NOENT)
+    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
      self.op.target_node = target_node
      self.needed_locks[locking.LEVEL_NODE] = [target_node]
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
@@ -4928,10 +4941,7 @@ class LUMigrateNode(LogicalUnit):
    REQ_BGL = False
  
    def ExpandNames(self):
-    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
-    if self.op.node_name is None:
-      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
-                                 errors.ECODE_NOENT)
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
  
      self.needed_locks = {
        locking.LEVEL_NODE: [self.op.node_name],
@@ -4991,11 +5001,9 @@ class TLMigrateInstance(Tasklet):
      This checks that the instance is in the cluster.
  
      """
-    instance = self.cfg.GetInstanceInfo(
-      self.cfg.ExpandInstanceName(self.instance_name))
-    if instance is None:
-      raise errors.OpPrereqError("Instance '%s' not known" %
-                                 self.instance_name, errors.ECODE_NOENT)
+    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
+    instance = self.cfg.GetInstanceInfo(instance_name)
+    assert instance is not None
  
      if instance.disk_template != constants.DT_DRBD8:
        raise errors.OpPrereqError("Instance's disk layout is not"
@@ -5651,15 +5659,6 @@ class LUCreateInstance(LogicalUnit):
        raise errors.OpPrereqError("Cannot do ip checks without a name check",
                                   errors.ECODE_INVAL)
  
-  def _ExpandNode(self, node):
-    """Expands and checks one node name.
-
-    """
-    node_full = self.cfg.ExpandNodeName(node)
-    if node_full is None:
-      raise errors.OpPrereqError("Unknown node %s" % node, errors.ECODE_NOENT)
-    return node_full
-
    def ExpandNames(self):
      """ExpandNames for CreateInstance.
  
@@ -5815,7 +5814,7 @@ class LUCreateInstance(LogicalUnit):
          raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
        try:
          size = int(size)
-      except ValueError:
+      except (TypeError, ValueError):
          raise errors.OpPrereqError("Invalid disk size '%s'" % size,
                                     errors.ECODE_INVAL)
        self.disks.append({"size": size, "mode": mode})
@@ -5839,10 +5838,10 @@ class LUCreateInstance(LogicalUnit):
      if self.op.iallocator:
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
      else:
-      self.op.pnode = self._ExpandNode(self.op.pnode)
+      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
        nodelist = [self.op.pnode]
        if self.op.snode is not None:
-        self.op.snode = self._ExpandNode(self.op.snode)
+        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
          nodelist.append(self.op.snode)
        self.needed_locks[locking.LEVEL_NODE] = nodelist
  
@@ -5862,7 +5861,7 @@ class LUCreateInstance(LogicalUnit):
                                       " path requires a source node option.",
                                       errors.ECODE_INVAL)
        else:
-        self.op.src_node = src_node = self._ExpandNode(src_node)
+        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
          if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
            self.needed_locks[locking.LEVEL_NODE].append(src_node)
          if not os.path.isabs(src_path):
@@ -6227,7 +6226,9 @@ class LUCreateInstance(LogicalUnit):
      if iobj.disk_template != constants.DT_DISKLESS:
        if self.op.mode == constants.INSTANCE_CREATE:
          feedback_fn("* running the instance OS create scripts...")
-        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
+        # FIXME: pass debug option from opcode to backend
+        result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
+                                               self.op.debug_level)
          result.Raise("Could not add os for instance %s"
                       " on node %s" % (instance, pnode_name))
  
@@ -6236,9 +6237,11 @@ class LUCreateInstance(LogicalUnit):
          src_node = self.op.src_node
          src_images = self.src_images
          cluster_name = self.cfg.GetClusterName()
+        # FIXME: pass debug option from opcode to backend
          import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
                                                           src_node, src_images,
-                                                         cluster_name)
+                                                         cluster_name,
+                                                         self.op.debug_level)
          msg = import_result.fail_msg
          if msg:
            self.LogWarning("Error while importing the disk images for instance"
@@ -6326,6 +6329,8 @@ class LUReplaceDisks(LogicalUnit):
        self.op.remote_node = None
      if not hasattr(self.op, "iallocator"):
        self.op.iallocator = None
+    if not hasattr(self.op, "early_release"):
+      self.op.early_release = False
  
      TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
                                    self.op.iallocator)
@@ -6337,11 +6342,7 @@ class LUReplaceDisks(LogicalUnit):
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
  
      elif self.op.remote_node is not None:
-      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
-      if remote_node is None:
-        raise errors.OpPrereqError("Node '%s' not known" %
-                                   self.op.remote_node, errors.ECODE_NOENT)
-
+      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
        self.op.remote_node = remote_node
  
        # Warning: do not remove the locking of the new secondary here
@@ -6357,7 +6358,7 @@ class LUReplaceDisks(LogicalUnit):
  
      self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
                                     self.op.iallocator, self.op.remote_node,
-                                   self.op.disks)
+                                   self.op.disks, False, self.op.early_release)
  
      self.tasklets = [self.replacer]
  
@@ -6404,16 +6405,15 @@ class LUEvacuateNode(LogicalUnit):
        self.op.remote_node = None
      if not hasattr(self.op, "iallocator"):
        self.op.iallocator = None
+    if not hasattr(self.op, "early_release"):
+      self.op.early_release = False
  
      TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
                                    self.op.remote_node,
                                    self.op.iallocator)
  
    def ExpandNames(self):
-    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
-    if self.op.node_name is None:
-      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
-                                 errors.ECODE_NOENT)
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
  
      self.needed_locks = {}
  
@@ -6422,18 +6422,13 @@ class LUEvacuateNode(LogicalUnit):
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
  
      elif self.op.remote_node is not None:
-      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
-      if remote_node is None:
-        raise errors.OpPrereqError("Node '%s' not known" %
-                                   self.op.remote_node, errors.ECODE_NOENT)
-
-      self.op.remote_node = remote_node
+      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
  
        # Warning: do not remove the locking of the new secondary here
        # unless DRBD8.AddChildren is changed to work in parallel;
        # currently it doesn't since parallel invocations of
        # FindUnusedMinor will conflict
-      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
+      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
  
      else:
@@ -6449,7 +6444,8 @@ class LUEvacuateNode(LogicalUnit):
        names.append(inst.name)
  
        replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
-                                self.op.iallocator, self.op.remote_node, [])
+                                self.op.iallocator, self.op.remote_node, [],
+                                True, self.op.early_release)
        tasklets.append(replacer)
  
      self.tasklets = tasklets
@@ -6491,7 +6487,7 @@ class TLReplaceDisks(Tasklet):
  
    """
    def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
-               disks):
+               disks, delay_iallocator, early_release):
      """Initializes this class.
  
      """
@@ -6503,6 +6499,8 @@ class TLReplaceDisks(Tasklet):
      self.iallocator_name = iallocator_name
      self.remote_node = remote_node
      self.disks = disks
+    self.delay_iallocator = delay_iallocator
+    self.early_release = early_release
  
      # Runtime data
      self.instance = None
@@ -6589,6 +6587,19 @@ class TLReplaceDisks(Tasklet):
                                   len(instance.secondary_nodes),
                                   errors.ECODE_FAULT)
  
+    if not self.delay_iallocator:
+      self._CheckPrereq2()
+
+  def _CheckPrereq2(self):
+    """Check prerequisites, second part.
+
+    This function should always be part of CheckPrereq. It was separated and is
+    now called from Exec because during node evacuation iallocator was only
+    called with an unmodified cluster model, not taking planned changes into
+    account.
+
+    """
+    instance = self.instance
      secondary_node = instance.secondary_nodes[0]
  
      if self.iallocator_name is None:
@@ -6662,6 +6673,14 @@ class TLReplaceDisks(Tasklet):
  
          _CheckNodeNotDrained(self.lu, remote_node)
  
+        old_node_info = self.cfg.GetNodeInfo(secondary_node)
+        assert old_node_info is not None
+        if old_node_info.offline and not self.early_release:
+          # doesn't make sense to delay the release
+          self.early_release = True
+          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
+                          " early-release mode", secondary_node)
+
        else:
          raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
                                       self.mode)
@@ -6692,6 +6711,9 @@ class TLReplaceDisks(Tasklet):
      This dispatches the disk replacement to the appropriate handler.
  
      """
+    if self.delay_iallocator:
+      self._CheckPrereq2()
+
      if not self.disks:
        feedback_fn("No disks need replacement")
        return
@@ -6829,6 +6851,10 @@ class TLReplaceDisks(Tasklet):
            self.lu.LogWarning("Can't remove old LV: %s" % msg,
                               hint="remove unused LVs manually")
  
+  def _ReleaseNodeLock(self, node_name):
+    """Releases the lock for a given node."""
+    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
+
    def _ExecDrbd8DiskOnly(self, feedback_fn):
      """Replace a disk on the primary or secondary for DRBD 8.
  
@@ -6939,18 +6965,30 @@ class TLReplaceDisks(Tasklet):
  
        self.cfg.Update(self.instance, feedback_fn)
  
+    cstep = 5
+    if self.early_release:
+      self.lu.LogStep(cstep, steps_total, "Removing old storage")
+      cstep += 1
+      self._RemoveOldStorage(self.target_node, iv_names)
+      # WARNING: we release both node locks here, do not do other RPCs
+      # than WaitForSync to the primary node
+      self._ReleaseNodeLock([self.target_node, self.other_node])
+
      # Wait for sync
      # This can fail as the old devices are degraded and _WaitForSync
      # does a combined result over all disks, so we don't check its return value
-    self.lu.LogStep(5, steps_total, "Sync devices")
+    self.lu.LogStep(cstep, steps_total, "Sync devices")
+    cstep += 1
      _WaitForSync(self.lu, self.instance)
  
      # Check all devices manually
      self._CheckDevices(self.instance.primary_node, iv_names)
  
      # Step: remove old storage
-    self.lu.LogStep(6, steps_total, "Removing old storage")
-    self._RemoveOldStorage(self.target_node, iv_names)
+    if not self.early_release:
+      self.lu.LogStep(cstep, steps_total, "Removing old storage")
+      cstep += 1
+      self._RemoveOldStorage(self.target_node, iv_names)
  
    def _ExecDrbd8Secondary(self, feedback_fn):
      """Replace the secondary node for DRBD 8.
@@ -7084,19 +7122,31 @@ class TLReplaceDisks(Tasklet):
                             to_node, msg,
                             hint=("please do a gnt-instance info to see the"
                                   " status of disks"))
+    cstep = 5
+    if self.early_release:
+      self.lu.LogStep(cstep, steps_total, "Removing old storage")
+      cstep += 1
+      self._RemoveOldStorage(self.target_node, iv_names)
+      # WARNING: we release all node locks here, do not do other RPCs
+      # than WaitForSync to the primary node
+      self._ReleaseNodeLock([self.instance.primary_node,
+                             self.target_node,
+                             self.new_node])
  
      # Wait for sync
      # This can fail as the old devices are degraded and _WaitForSync
      # does a combined result over all disks, so we don't check its return value
-    self.lu.LogStep(5, steps_total, "Sync devices")
+    self.lu.LogStep(cstep, steps_total, "Sync devices")
+    cstep += 1
      _WaitForSync(self.lu, self.instance)
  
      # Check all devices manually
      self._CheckDevices(self.instance.primary_node, iv_names)
  
      # Step: remove old storage
-    self.lu.LogStep(6, steps_total, "Removing old storage")
-    self._RemoveOldStorage(self.target_node, iv_names)
+    if not self.early_release:
+      self.lu.LogStep(cstep, steps_total, "Removing old storage")
+      self._RemoveOldStorage(self.target_node, iv_names)
  
  
  class LURepairNodeStorage(NoHooksLU):
@@ -7107,12 +7157,7 @@ class LURepairNodeStorage(NoHooksLU):
    REQ_BGL = False
  
    def CheckArguments(self):
-    node_name = self.cfg.ExpandNodeName(self.op.node_name)
-    if node_name is None:
-      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
-                                 errors.ECODE_NOENT)
-
-    self.op.node_name = node_name
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
  
    def ExpandNames(self):
      self.needed_locks = {
@@ -7196,10 +7241,7 @@ class LUGrowDisk(LogicalUnit):
        "AMOUNT": self.op.amount,
        }
      env.update(_BuildInstanceHookEnvByObject(self, self.instance))
-    nl = [
-      self.cfg.GetMasterNode(),
-      self.instance.primary_node,
-      ]
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
      return env, nl, nl
  
    def CheckPrereq(self):
@@ -7284,10 +7326,7 @@ class LUQueryInstanceData(NoHooksLU):
      if self.op.instances:
        self.wanted_names = []
        for name in self.op.instances:
-        full_name = self.cfg.ExpandInstanceName(name)
-        if full_name is None:
-          raise errors.OpPrereqError("Instance '%s' not known" % name,
-                                     errors.ECODE_NOENT)
+        full_name = _ExpandInstanceName(self.cfg, name)
          self.wanted_names.append(full_name)
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
      else:
@@ -7478,7 +7517,7 @@ class LUSetInstanceParams(LogicalUnit):
                                       errors.ECODE_INVAL)
          try:
            size = int(size)
-        except ValueError, err:
+        except (TypeError, ValueError), err:
            raise errors.OpPrereqError("Invalid disk size parameter: %s" %
                                       str(err), errors.ECODE_INVAL)
          disk_dict['size'] = size
@@ -8059,13 +8098,10 @@ class LUExportInstance(LogicalUnit):
            "Cannot retrieve locked instance %s" % self.op.instance_name
      _CheckNodeOnline(self, self.instance.primary_node)
  
-    self.dst_node = self.cfg.GetNodeInfo(
-      self.cfg.ExpandNodeName(self.op.target_node))
+    self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
+    self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
+    assert self.dst_node is not None
  
-    if self.dst_node is None:
-      # This is wrong node name, not a non-locked node
-      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node,
-                                 errors.ECODE_NOENT)
      _CheckNodeOnline(self, self.dst_node.name)
      _CheckNodeNotDrained(self, self.dst_node.name)
  
@@ -8146,8 +8182,10 @@ class LUExportInstance(LogicalUnit):
          feedback_fn("Exporting snapshot %s from %s to %s" %
                      (idx, src_node, dst_node.name))
          if dev:
+          # FIXME: pass debug from opcode to backend
            result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
-                                                 instance, cluster_name, idx)
+                                                 instance, cluster_name,
+                                                 idx, self.op.debug_level)
            msg = result.fail_msg
            if msg:
              self.LogWarning("Could not export disk/%s from node %s to"
@@ -8261,19 +8299,11 @@ class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
    def ExpandNames(self):
      self.needed_locks = {}
      if self.op.kind == constants.TAG_NODE:
-      name = self.cfg.ExpandNodeName(self.op.name)
-      if name is None:
-        raise errors.OpPrereqError("Invalid node name (%s)" %
-                                   (self.op.name,), errors.ECODE_NOENT)
-      self.op.name = name
-      self.needed_locks[locking.LEVEL_NODE] = name
+      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
+      self.needed_locks[locking.LEVEL_NODE] = self.op.name
      elif self.op.kind == constants.TAG_INSTANCE:
-      name = self.cfg.ExpandInstanceName(self.op.name)
-      if name is None:
-        raise errors.OpPrereqError("Invalid instance name (%s)" %
-                                   (self.op.name,), errors.ECODE_NOENT)
-      self.op.name = name
-      self.needed_locks[locking.LEVEL_INSTANCE] = name
+      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
+      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
  
    def CheckPrereq(self):
      """Check prerequisites.
@@ -8807,10 +8837,7 @@ class LUTestAllocator(NoHooksLU):
        if not hasattr(self.op, "name"):
          raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
                                     errors.ECODE_INVAL)
-      fname = self.cfg.ExpandInstanceName(self.op.name)
-      if fname is None:
-        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
-                                   self.op.name, errors.ECODE_NOENT)
+      fname = _ExpandInstanceName(self.cfg, self.op.name)
        self.op.name = fname
        self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
      else: