bootstrap: Wait for node daemon when adding new node
[ganeti-local] / lib / cmdlib.py
index 93abd9c..afffbfb 100644 (file)
@@ -95,6 +95,10 @@ class LogicalUnit(object):
     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
     # support for dry-run
     self.dry_run_result = None
+    # support for generic debug attribute
+    if (not hasattr(self.op, "debug_level") or
+        not isinstance(self.op.debug_level, int)):
+      self.op.debug_level = 0
 
     # Tasklets
     self.tasklets = None
@@ -300,12 +304,9 @@ class LogicalUnit(object):
     else:
       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
         "_ExpandAndLockInstance called with instance-level locks set"
-    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
-    if expanded_name is None:
-      raise errors.OpPrereqError("Instance '%s' not known" %
-                                 self.op.instance_name, errors.ECODE_NOENT)
-    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
-    self.op.instance_name = expanded_name
+    self.op.instance_name = _ExpandInstanceName(self.cfg,
+                                                self.op.instance_name)
+    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 
   def _LockInstancesNodes(self, primary_only=False):
     """Helper function to declare instances' nodes for locking.
@@ -427,7 +428,7 @@ def _GetWantedNodes(lu, nodes):
   @param nodes: list of node names or None for all nodes
   @rtype: list
   @return: the list of nodes, sorted
-  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
+  @raise errors.ProgrammerError: if the nodes parameter is wrong type
 
   """
   if not isinstance(nodes, list):
@@ -438,14 +439,7 @@ def _GetWantedNodes(lu, nodes):
     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
       " non-empty list of nodes whose name is to be expanded.")
 
-  wanted = []
-  for name in nodes:
-    node = lu.cfg.ExpandNodeName(name)
-    if node is None:
-      raise errors.OpPrereqError("No such node name '%s'" % name,
-                                 errors.ECODE_NOENT)
-    wanted.append(node)
-
+  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
   return utils.NiceSort(wanted)
 
 
@@ -467,15 +461,7 @@ def _GetWantedInstances(lu, instances):
                                errors.ECODE_INVAL)
 
   if instances:
-    wanted = []
-
-    for name in instances:
-      instance = lu.cfg.ExpandInstanceName(name)
-      if instance is None:
-        raise errors.OpPrereqError("No such instance name '%s'" % name,
-                                   errors.ECODE_NOENT)
-      wanted.append(instance)
-
+    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
   else:
     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
   return wanted
@@ -555,6 +541,33 @@ def _CheckNodeNotDrained(lu, node):
                                errors.ECODE_INVAL)
 
 
+def _ExpandItemName(fn, name, kind):
+  """Expand an item name.
+
+  @param fn: the function to use for expansion
+  @param name: requested item name
+  @param kind: text description ('Node' or 'Instance')
+  @return: the resolved (full) name
+  @raise errors.OpPrereqError: if the item is not found
+
+  """
+  full_name = fn(name)
+  if full_name is None:
+    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
+                               errors.ECODE_NOENT)
+  return full_name
+
+
+def _ExpandNodeName(cfg, name):
+  """Wrapper over L{_ExpandItemName} for nodes."""
+  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
+
+
+def _ExpandInstanceName(cfg, name):
+  """Wrapper over L{_ExpandItemName} for instance."""
+  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
+
+
 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
                           memory, vcpus, nics, disk_template, disks,
                           bep, hvp, hypervisor_name):
@@ -1603,8 +1616,8 @@ class LUVerifyCluster(LogicalUnit):
         test = msg and not res.offline
         self._ErrorIf(test, self.ENODEHOOKS, node_name,
                       "Communication failure in hooks execution: %s", msg)
-        if res.offline:
-          # No need to investigate payload if node is offline.
+        if res.offline or msg:
+          # No need to investigate payload if node is offline or gave an error.
           # override manually lu_result here as _ErrorIf only
           # overrides self.bad
           lu_result = 1
@@ -1719,10 +1732,7 @@ class LURepairDiskSizes(NoHooksLU):
     if self.op.instances:
       self.wanted_names = []
       for name in self.op.instances:
-        full_name = self.cfg.ExpandInstanceName(name)
-        if full_name is None:
-          raise errors.OpPrereqError("Instance '%s' not known" % name,
-                                     errors.ECODE_NOENT)
+        full_name = _ExpandInstanceName(self.cfg, name)
         self.wanted_names.append(full_name)
       self.needed_locks = {
         locking.LEVEL_NODE: [],
@@ -2425,8 +2435,11 @@ class LURemoveNode(LogicalUnit):
       "NODE_NAME": self.op.node_name,
       }
     all_nodes = self.cfg.GetNodeList()
-    if self.op.node_name in all_nodes:
+    try:
       all_nodes.remove(self.op.node_name)
+    except ValueError:
+      logging.warning("Node %s which is about to be removed not found"
+                      " in the all nodes list", self.op.node_name)
     return env, all_nodes, all_nodes
 
   def CheckPrereq(self):
@@ -2440,10 +2453,9 @@ class LURemoveNode(LogicalUnit):
     Any errors are signaled by raising errors.OpPrereqError.
 
     """
-    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
-    if node is None:
-      raise errors.OpPrereqError("Node '%s' is unknown." % self.op.node_name,
-                                 errors.ECODE_NOENT)
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
+    node = self.cfg.GetNodeInfo(self.op.node_name)
+    assert node is not None
 
     instance_list = self.cfg.GetInstanceList()
 
@@ -2843,12 +2855,7 @@ class LUModifyNodeStorage(NoHooksLU):
   REQ_BGL = False
 
   def CheckArguments(self):
-    node_name = self.cfg.ExpandNodeName(self.op.node_name)
-    if node_name is None:
-      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
-                                 errors.ECODE_NOENT)
-
-    self.op.node_name = node_name
+    self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
 
     storage_type = self.op.storage_type
     if storage_type not in constants.VALID_STORAGE_TYPES:
@@ -3122,11 +3129,7 @@ class LUSetNodeParams(LogicalUnit):
   REQ_BGL = False
 
   def CheckArguments(self):
-    node_name = self.cfg.ExpandNodeName(self.op.node_name)
-    if node_name is None:
-      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
-                                 errors.ECODE_INVAL)
-    self.op.node_name = node_name
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
     _CheckBooleanOpField(self.op, 'master_candidate')
     _CheckBooleanOpField(self.op, 'offline')
     _CheckBooleanOpField(self.op, 'drained')
@@ -3275,12 +3278,8 @@ class LUPowercycleNode(NoHooksLU):
   REQ_BGL = False
 
   def CheckArguments(self):
-    node_name = self.cfg.ExpandNodeName(self.op.node_name)
-    if node_name is None:
-      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
-                                 errors.ECODE_NOENT)
-    self.op.node_name = node_name
-    if node_name == self.cfg.GetMasterNode() and not self.op.force:
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
+    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
       raise errors.OpPrereqError("The node is the master and the force"
                                  " parameter was not set",
                                  errors.ECODE_INVAL)
@@ -3962,14 +3961,10 @@ class LUReinstallInstance(LogicalUnit):
     self.op.force_variant = getattr(self.op, "force_variant", False)
     if self.op.os_type is not None:
       # OS verification
-      pnode = self.cfg.GetNodeInfo(
-        self.cfg.ExpandNodeName(instance.primary_node))
-      if pnode is None:
-        raise errors.OpPrereqError("Primary node '%s' is unknown" %
-                                   self.op.pnode, errors.ECODE_NOENT)
-      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
+      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
+      result = self.rpc.call_os_get(pnode, self.op.os_type)
       result.Raise("OS '%s' not in supported OS list for primary node %s" %
-                   (self.op.os_type, pnode.name),
+                   (self.op.os_type, pnode),
                    prereq=True, ecode=errors.ECODE_INVAL)
       if not self.op.force_variant:
         _CheckOSVariant(result.payload, self.op.os_type)
@@ -3990,7 +3985,9 @@ class LUReinstallInstance(LogicalUnit):
     _StartInstanceDisks(self, inst, None)
     try:
       feedback_fn("Running the instance OS create scripts...")
-      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
+      # FIXME: pass debug option from opcode to backend
+      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
+                                             self.op.debug_level)
       result.Raise("Could not install OS for instance %s on node %s" %
                    (inst.name, inst.primary_node))
     finally:
@@ -4106,11 +4103,10 @@ class LURenameInstance(LogicalUnit):
     This checks that the instance is in the cluster and is not running.
 
     """
-    instance = self.cfg.GetInstanceInfo(
-      self.cfg.ExpandInstanceName(self.op.instance_name))
-    if instance is None:
-      raise errors.OpPrereqError("Instance '%s' not known" %
-                                 self.op.instance_name, errors.ECODE_NOENT)
+    self.op.instance_name = _ExpandInstanceName(self.cfg,
+                                                self.op.instance_name)
+    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+    assert instance is not None
     _CheckNodeOnline(self, instance.primary_node)
 
     if instance.admin_up:
@@ -4174,7 +4170,7 @@ class LURenameInstance(LogicalUnit):
     _StartInstanceDisks(self, inst, None)
     try:
       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
-                                                 old_name)
+                                                 old_name, self.op.debug_level)
       msg = result.fail_msg
       if msg:
         msg = ("Could not run OS rename script for instance %s on node %s"
@@ -4219,7 +4215,8 @@ class LURemoveInstance(LogicalUnit):
     env = _BuildInstanceHookEnvByObject(self, self.instance)
     env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
     nl = [self.cfg.GetMasterNode()]
-    return env, nl, nl
+    nl_post = list(self.instance.all_nodes) + nl
+    return env, nl, nl_post
 
   def CheckPrereq(self):
     """Check prerequisites.
@@ -4578,13 +4575,22 @@ class LUFailoverInstance(LogicalUnit):
     This runs on master, primary and secondary nodes of the instance.
 
     """
+    instance = self.instance
+    source_node = instance.primary_node
+    target_node = instance.secondary_nodes[0]
     env = {
       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
+      "OLD_PRIMARY": source_node,
+      "OLD_SECONDARY": target_node,
+      "NEW_PRIMARY": target_node,
+      "NEW_SECONDARY": source_node,
       }
-    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
-    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
-    return env, nl, nl
+    env.update(_BuildInstanceHookEnvByObject(self, instance))
+    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
+    nl_post = list(nl)
+    nl_post.append(source_node)
+    return env, nl, nl_post
 
   def CheckPrereq(self):
     """Check prerequisites.
@@ -4726,11 +4732,21 @@ class LUMigrateInstance(LogicalUnit):
 
     """
     instance = self._migrater.instance
+    source_node = instance.primary_node
+    target_node = instance.secondary_nodes[0]
     env = _BuildInstanceHookEnvByObject(self, instance)
     env["MIGRATE_LIVE"] = self.op.live
     env["MIGRATE_CLEANUP"] = self.op.cleanup
+    env.update({
+        "OLD_PRIMARY": source_node,
+        "OLD_SECONDARY": target_node,
+        "NEW_PRIMARY": target_node,
+        "NEW_SECONDARY": source_node,
+        })
     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
-    return env, nl, nl
+    nl_post = list(nl)
+    nl_post.append(source_node)
+    return env, nl, nl_post
 
 
 class LUMoveInstance(LogicalUnit):
@@ -4751,10 +4767,7 @@ class LUMoveInstance(LogicalUnit):
 
   def ExpandNames(self):
     self._ExpandAndLockInstance()
-    target_node = self.cfg.ExpandNodeName(self.op.target_node)
-    if target_node is None:
-      raise errors.OpPrereqError("Node '%s' not known" %
-                                  self.op.target_node, errors.ECODE_NOENT)
+    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
     self.op.target_node = target_node
     self.needed_locks[locking.LEVEL_NODE] = [target_node]
     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
@@ -4928,10 +4941,7 @@ class LUMigrateNode(LogicalUnit):
   REQ_BGL = False
 
   def ExpandNames(self):
-    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
-    if self.op.node_name is None:
-      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
-                                 errors.ECODE_NOENT)
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
 
     self.needed_locks = {
       locking.LEVEL_NODE: [self.op.node_name],
@@ -4991,11 +5001,9 @@ class TLMigrateInstance(Tasklet):
     This checks that the instance is in the cluster.
 
     """
-    instance = self.cfg.GetInstanceInfo(
-      self.cfg.ExpandInstanceName(self.instance_name))
-    if instance is None:
-      raise errors.OpPrereqError("Instance '%s' not known" %
-                                 self.instance_name, errors.ECODE_NOENT)
+    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
+    instance = self.cfg.GetInstanceInfo(instance_name)
+    assert instance is not None
 
     if instance.disk_template != constants.DT_DRBD8:
       raise errors.OpPrereqError("Instance's disk layout is not"
@@ -5651,15 +5659,6 @@ class LUCreateInstance(LogicalUnit):
       raise errors.OpPrereqError("Cannot do ip checks without a name check",
                                  errors.ECODE_INVAL)
 
-  def _ExpandNode(self, node):
-    """Expands and checks one node name.
-
-    """
-    node_full = self.cfg.ExpandNodeName(node)
-    if node_full is None:
-      raise errors.OpPrereqError("Unknown node %s" % node, errors.ECODE_NOENT)
-    return node_full
-
   def ExpandNames(self):
     """ExpandNames for CreateInstance.
 
@@ -5815,7 +5814,7 @@ class LUCreateInstance(LogicalUnit):
         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
       try:
         size = int(size)
-      except ValueError:
+      except (TypeError, ValueError):
         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
                                    errors.ECODE_INVAL)
       self.disks.append({"size": size, "mode": mode})
@@ -5839,10 +5838,10 @@ class LUCreateInstance(LogicalUnit):
     if self.op.iallocator:
       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
     else:
-      self.op.pnode = self._ExpandNode(self.op.pnode)
+      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
       nodelist = [self.op.pnode]
       if self.op.snode is not None:
-        self.op.snode = self._ExpandNode(self.op.snode)
+        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
         nodelist.append(self.op.snode)
       self.needed_locks[locking.LEVEL_NODE] = nodelist
 
@@ -5862,7 +5861,7 @@ class LUCreateInstance(LogicalUnit):
                                      " path requires a source node option.",
                                      errors.ECODE_INVAL)
       else:
-        self.op.src_node = src_node = self._ExpandNode(src_node)
+        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
           self.needed_locks[locking.LEVEL_NODE].append(src_node)
         if not os.path.isabs(src_path):
@@ -6227,7 +6226,9 @@ class LUCreateInstance(LogicalUnit):
     if iobj.disk_template != constants.DT_DISKLESS:
       if self.op.mode == constants.INSTANCE_CREATE:
         feedback_fn("* running the instance OS create scripts...")
-        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
+        # FIXME: pass debug option from opcode to backend
+        result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
+                                               self.op.debug_level)
         result.Raise("Could not add os for instance %s"
                      " on node %s" % (instance, pnode_name))
 
@@ -6236,9 +6237,11 @@ class LUCreateInstance(LogicalUnit):
         src_node = self.op.src_node
         src_images = self.src_images
         cluster_name = self.cfg.GetClusterName()
+        # FIXME: pass debug option from opcode to backend
         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
                                                          src_node, src_images,
-                                                         cluster_name)
+                                                         cluster_name,
+                                                         self.op.debug_level)
         msg = import_result.fail_msg
         if msg:
           self.LogWarning("Error while importing the disk images for instance"
@@ -6326,6 +6329,8 @@ class LUReplaceDisks(LogicalUnit):
       self.op.remote_node = None
     if not hasattr(self.op, "iallocator"):
       self.op.iallocator = None
+    if not hasattr(self.op, "early_release"):
+      self.op.early_release = False
 
     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
                                   self.op.iallocator)
@@ -6337,11 +6342,7 @@ class LUReplaceDisks(LogicalUnit):
       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
 
     elif self.op.remote_node is not None:
-      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
-      if remote_node is None:
-        raise errors.OpPrereqError("Node '%s' not known" %
-                                   self.op.remote_node, errors.ECODE_NOENT)
-
+      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
       self.op.remote_node = remote_node
 
       # Warning: do not remove the locking of the new secondary here
@@ -6357,7 +6358,7 @@ class LUReplaceDisks(LogicalUnit):
 
     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
                                    self.op.iallocator, self.op.remote_node,
-                                   self.op.disks)
+                                   self.op.disks, False, self.op.early_release)
 
     self.tasklets = [self.replacer]
 
@@ -6404,16 +6405,15 @@ class LUEvacuateNode(LogicalUnit):
       self.op.remote_node = None
     if not hasattr(self.op, "iallocator"):
       self.op.iallocator = None
+    if not hasattr(self.op, "early_release"):
+      self.op.early_release = False
 
     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
                                   self.op.remote_node,
                                   self.op.iallocator)
 
   def ExpandNames(self):
-    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
-    if self.op.node_name is None:
-      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
-                                 errors.ECODE_NOENT)
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
 
     self.needed_locks = {}
 
@@ -6422,18 +6422,13 @@ class LUEvacuateNode(LogicalUnit):
       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
 
     elif self.op.remote_node is not None:
-      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
-      if remote_node is None:
-        raise errors.OpPrereqError("Node '%s' not known" %
-                                   self.op.remote_node, errors.ECODE_NOENT)
-
-      self.op.remote_node = remote_node
+      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
 
       # Warning: do not remove the locking of the new secondary here
       # unless DRBD8.AddChildren is changed to work in parallel;
       # currently it doesn't since parallel invocations of
       # FindUnusedMinor will conflict
-      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
+      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
 
     else:
@@ -6449,7 +6444,8 @@ class LUEvacuateNode(LogicalUnit):
       names.append(inst.name)
 
       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
-                                self.op.iallocator, self.op.remote_node, [])
+                                self.op.iallocator, self.op.remote_node, [],
+                                True, self.op.early_release)
       tasklets.append(replacer)
 
     self.tasklets = tasklets
@@ -6491,7 +6487,7 @@ class TLReplaceDisks(Tasklet):
 
   """
   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
-               disks):
+               disks, delay_iallocator, early_release):
     """Initializes this class.
 
     """
@@ -6503,6 +6499,8 @@ class TLReplaceDisks(Tasklet):
     self.iallocator_name = iallocator_name
     self.remote_node = remote_node
     self.disks = disks
+    self.delay_iallocator = delay_iallocator
+    self.early_release = early_release
 
     # Runtime data
     self.instance = None
@@ -6589,6 +6587,19 @@ class TLReplaceDisks(Tasklet):
                                  len(instance.secondary_nodes),
                                  errors.ECODE_FAULT)
 
+    if not self.delay_iallocator:
+      self._CheckPrereq2()
+
+  def _CheckPrereq2(self):
+    """Check prerequisites, second part.
+
+    This function should always be part of CheckPrereq. It was separated and is
+    now called from Exec because during node evacuation iallocator was only
+    called with an unmodified cluster model, not taking planned changes into
+    account.
+
+    """
+    instance = self.instance
     secondary_node = instance.secondary_nodes[0]
 
     if self.iallocator_name is None:
@@ -6662,6 +6673,14 @@ class TLReplaceDisks(Tasklet):
 
         _CheckNodeNotDrained(self.lu, remote_node)
 
+        old_node_info = self.cfg.GetNodeInfo(secondary_node)
+        assert old_node_info is not None
+        if old_node_info.offline and not self.early_release:
+          # doesn't make sense to delay the release
+          self.early_release = True
+          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
+                          " early-release mode", secondary_node)
+
       else:
         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
                                      self.mode)
@@ -6692,6 +6711,9 @@ class TLReplaceDisks(Tasklet):
     This dispatches the disk replacement to the appropriate handler.
 
     """
+    if self.delay_iallocator:
+      self._CheckPrereq2()
+
     if not self.disks:
       feedback_fn("No disks need replacement")
       return
@@ -6829,6 +6851,10 @@ class TLReplaceDisks(Tasklet):
           self.lu.LogWarning("Can't remove old LV: %s" % msg,
                              hint="remove unused LVs manually")
 
+  def _ReleaseNodeLock(self, node_name):
+    """Releases the lock for a given node."""
+    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
+
   def _ExecDrbd8DiskOnly(self, feedback_fn):
     """Replace a disk on the primary or secondary for DRBD 8.
 
@@ -6939,18 +6965,30 @@ class TLReplaceDisks(Tasklet):
 
       self.cfg.Update(self.instance, feedback_fn)
 
+    cstep = 5
+    if self.early_release:
+      self.lu.LogStep(cstep, steps_total, "Removing old storage")
+      cstep += 1
+      self._RemoveOldStorage(self.target_node, iv_names)
+      # WARNING: we release both node locks here, do not do other RPCs
+      # than WaitForSync to the primary node
+      self._ReleaseNodeLock([self.target_node, self.other_node])
+
     # Wait for sync
     # This can fail as the old devices are degraded and _WaitForSync
     # does a combined result over all disks, so we don't check its return value
-    self.lu.LogStep(5, steps_total, "Sync devices")
+    self.lu.LogStep(cstep, steps_total, "Sync devices")
+    cstep += 1
     _WaitForSync(self.lu, self.instance)
 
     # Check all devices manually
     self._CheckDevices(self.instance.primary_node, iv_names)
 
     # Step: remove old storage
-    self.lu.LogStep(6, steps_total, "Removing old storage")
-    self._RemoveOldStorage(self.target_node, iv_names)
+    if not self.early_release:
+      self.lu.LogStep(cstep, steps_total, "Removing old storage")
+      cstep += 1
+      self._RemoveOldStorage(self.target_node, iv_names)
 
   def _ExecDrbd8Secondary(self, feedback_fn):
     """Replace the secondary node for DRBD 8.
@@ -7084,19 +7122,31 @@ class TLReplaceDisks(Tasklet):
                            to_node, msg,
                            hint=("please do a gnt-instance info to see the"
                                  " status of disks"))
+    cstep = 5
+    if self.early_release:
+      self.lu.LogStep(cstep, steps_total, "Removing old storage")
+      cstep += 1
+      self._RemoveOldStorage(self.target_node, iv_names)
+      # WARNING: we release all node locks here, do not do other RPCs
+      # than WaitForSync to the primary node
+      self._ReleaseNodeLock([self.instance.primary_node,
+                             self.target_node,
+                             self.new_node])
 
     # Wait for sync
     # This can fail as the old devices are degraded and _WaitForSync
     # does a combined result over all disks, so we don't check its return value
-    self.lu.LogStep(5, steps_total, "Sync devices")
+    self.lu.LogStep(cstep, steps_total, "Sync devices")
+    cstep += 1
     _WaitForSync(self.lu, self.instance)
 
     # Check all devices manually
     self._CheckDevices(self.instance.primary_node, iv_names)
 
     # Step: remove old storage
-    self.lu.LogStep(6, steps_total, "Removing old storage")
-    self._RemoveOldStorage(self.target_node, iv_names)
+    if not self.early_release:
+      self.lu.LogStep(cstep, steps_total, "Removing old storage")
+      self._RemoveOldStorage(self.target_node, iv_names)
 
 
 class LURepairNodeStorage(NoHooksLU):
@@ -7107,12 +7157,7 @@ class LURepairNodeStorage(NoHooksLU):
   REQ_BGL = False
 
   def CheckArguments(self):
-    node_name = self.cfg.ExpandNodeName(self.op.node_name)
-    if node_name is None:
-      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
-                                 errors.ECODE_NOENT)
-
-    self.op.node_name = node_name
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
 
   def ExpandNames(self):
     self.needed_locks = {
@@ -7196,10 +7241,7 @@ class LUGrowDisk(LogicalUnit):
       "AMOUNT": self.op.amount,
       }
     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
-    nl = [
-      self.cfg.GetMasterNode(),
-      self.instance.primary_node,
-      ]
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
     return env, nl, nl
 
   def CheckPrereq(self):
@@ -7284,10 +7326,7 @@ class LUQueryInstanceData(NoHooksLU):
     if self.op.instances:
       self.wanted_names = []
       for name in self.op.instances:
-        full_name = self.cfg.ExpandInstanceName(name)
-        if full_name is None:
-          raise errors.OpPrereqError("Instance '%s' not known" % name,
-                                     errors.ECODE_NOENT)
+        full_name = _ExpandInstanceName(self.cfg, name)
         self.wanted_names.append(full_name)
       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
     else:
@@ -7478,7 +7517,7 @@ class LUSetInstanceParams(LogicalUnit):
                                      errors.ECODE_INVAL)
         try:
           size = int(size)
-        except ValueError, err:
+        except (TypeError, ValueError), err:
           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
                                      str(err), errors.ECODE_INVAL)
         disk_dict['size'] = size
@@ -8059,13 +8098,10 @@ class LUExportInstance(LogicalUnit):
           "Cannot retrieve locked instance %s" % self.op.instance_name
     _CheckNodeOnline(self, self.instance.primary_node)
 
-    self.dst_node = self.cfg.GetNodeInfo(
-      self.cfg.ExpandNodeName(self.op.target_node))
+    self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
+    self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
+    assert self.dst_node is not None
 
-    if self.dst_node is None:
-      # This is wrong node name, not a non-locked node
-      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node,
-                                 errors.ECODE_NOENT)
     _CheckNodeOnline(self, self.dst_node.name)
     _CheckNodeNotDrained(self, self.dst_node.name)
 
@@ -8146,8 +8182,10 @@ class LUExportInstance(LogicalUnit):
         feedback_fn("Exporting snapshot %s from %s to %s" %
                     (idx, src_node, dst_node.name))
         if dev:
+          # FIXME: pass debug from opcode to backend
           result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
-                                                 instance, cluster_name, idx)
+                                                 instance, cluster_name,
+                                                 idx, self.op.debug_level)
           msg = result.fail_msg
           if msg:
             self.LogWarning("Could not export disk/%s from node %s to"
@@ -8261,19 +8299,11 @@ class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
   def ExpandNames(self):
     self.needed_locks = {}
     if self.op.kind == constants.TAG_NODE:
-      name = self.cfg.ExpandNodeName(self.op.name)
-      if name is None:
-        raise errors.OpPrereqError("Invalid node name (%s)" %
-                                   (self.op.name,), errors.ECODE_NOENT)
-      self.op.name = name
-      self.needed_locks[locking.LEVEL_NODE] = name
+      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
+      self.needed_locks[locking.LEVEL_NODE] = self.op.name
     elif self.op.kind == constants.TAG_INSTANCE:
-      name = self.cfg.ExpandInstanceName(self.op.name)
-      if name is None:
-        raise errors.OpPrereqError("Invalid instance name (%s)" %
-                                   (self.op.name,), errors.ECODE_NOENT)
-      self.op.name = name
-      self.needed_locks[locking.LEVEL_INSTANCE] = name
+      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
+      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
 
   def CheckPrereq(self):
     """Check prerequisites.
@@ -8807,10 +8837,7 @@ class LUTestAllocator(NoHooksLU):
       if not hasattr(self.op, "name"):
         raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
                                    errors.ECODE_INVAL)
-      fname = self.cfg.ExpandInstanceName(self.op.name)
-      if fname is None:
-        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
-                                   self.op.name, errors.ECODE_NOENT)
+      fname = _ExpandInstanceName(self.cfg, self.op.name)
       self.op.name = fname
       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
     else: