Transition into and out of offline instance state

[ganeti-local] / lib / cmdlib.py
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index 6a2c03e..57883c1 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -59,10 +59,22 @@ from ganeti import query
  from ganeti import qlang
  from ganeti import opcodes
  from ganeti import ht
+from ganeti import rpc
  
  import ganeti.masterd.instance # pylint: disable=W0611
  
  
+#: Size of DRBD meta block device
+DRBD_META_SIZE = 128
+
+# States of instance
+INSTANCE_UP = [constants.ADMINST_UP]
+INSTANCE_DOWN = [constants.ADMINST_DOWN]
+INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
+INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
+INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
+
+
  class ResultWithJobs:
    """Data container for LU results with jobs.
  
@@ -108,7 +120,7 @@ class LogicalUnit(object):
    HTYPE = None
    REQ_BGL = True
  
-  def __init__(self, processor, op, context, rpc):
+  def __init__(self, processor, op, context, rpc_runner):
      """Constructor for LogicalUnit.
  
      This needs to be overridden in derived classes in order to check op
@@ -122,7 +134,7 @@ class LogicalUnit(object):
      # readability alias
      self.owned_locks = context.glm.list_owned
      self.context = context
-    self.rpc = rpc
+    self.rpc = rpc_runner
      # Dicts used to declare locking needs to mcpu
      self.needed_locks = None
      self.share_locks = dict.fromkeys(locking.LEVELS, 0)
@@ -344,7 +356,8 @@ class LogicalUnit(object):
                                                  self.op.instance_name)
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
  
-  def _LockInstancesNodes(self, primary_only=False):
+  def _LockInstancesNodes(self, primary_only=False,
+                          level=locking.LEVEL_NODE):
      """Helper function to declare instances' nodes for locking.
  
      This function should be called after locking one or more instances to lock
@@ -365,9 +378,10 @@ class LogicalUnit(object):
  
      @type primary_only: boolean
      @param primary_only: only lock primary nodes of locked instances
+    @param level: Which lock level to use for locking nodes
  
      """
-    assert locking.LEVEL_NODE in self.recalculate_locks, \
+    assert level in self.recalculate_locks, \
        "_LockInstancesNodes helper function called with no nodes to recalculate"
  
      # TODO: check if we're really been called with the instance locks held
@@ -382,12 +396,14 @@ class LogicalUnit(object):
        if not primary_only:
          wanted_nodes.extend(instance.secondary_nodes)
  
-    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
-      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
-    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
-      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
+    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
+      self.needed_locks[level] = wanted_nodes
+    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
+      self.needed_locks[level].extend(wanted_nodes)
+    else:
+      raise errors.ProgrammerError("Unknown recalculation mode")
  
-    del self.recalculate_locks[locking.LEVEL_NODE]
+    del self.recalculate_locks[level]
  
  
  class NoHooksLU(LogicalUnit): # pylint: disable=W0223
@@ -468,13 +484,13 @@ class _QueryBase:
    #: Attribute holding field definitions
    FIELDS = None
  
-  def __init__(self, filter_, fields, use_locking):
+  def __init__(self, qfilter, fields, use_locking):
      """Initializes this class.
  
      """
      self.use_locking = use_locking
  
-    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
+    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
                               namefield="name")
      self.requested_data = self.query.RequestedData()
      self.names = self.query.RequestedNames()
@@ -712,12 +728,17 @@ def _ReleaseLocks(lu, level, names=None, keep=None):
    else:
      should_release = None
  
-  if should_release:
+  owned = lu.owned_locks(level)
+  if not owned:
+    # Not owning any lock at this level, do nothing
+    pass
+
+  elif should_release:
      retain = []
      release = []
  
      # Determine which locks to release
-    for name in lu.owned_locks(level):
+    for name in owned:
        if should_release(name):
          release.append(name)
        else:
@@ -753,7 +774,7 @@ def _RunPostHook(lu, node_name):
    """Runs the post-hook for an opcode on a single node.
  
    """
-  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
+  hm = lu.proc.BuildHooksManager(lu)
    try:
      hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
    except:
@@ -889,20 +910,31 @@ def _GetClusterDomainSecret():
                                 strict=True)
  
  
-def _CheckInstanceDown(lu, instance, reason):
-  """Ensure that an instance is not running."""
-  if instance.admin_up:
-    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
-                               (instance.name, reason), errors.ECODE_STATE)
+def _CheckInstanceState(lu, instance, req_states, msg=None):
+  """Ensure that an instance is in one of the required states.
  
-  pnode = instance.primary_node
-  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
-  ins_l.Raise("Can't contact node %s for instance information" % pnode,
-              prereq=True, ecode=errors.ECODE_ENVIRON)
+  @param lu: the LU on behalf of which we make the check
+  @param instance: the instance to check
+  @param msg: if passed, should be a message to replace the default one
+  @raise errors.OpPrereqError: if the instance is not in the required state
  
-  if instance.name in ins_l.payload:
-    raise errors.OpPrereqError("Instance %s is running, %s" %
-                               (instance.name, reason), errors.ECODE_STATE)
+  """
+  if msg is None:
+    msg = "can't use instance from outside %s states" % ", ".join(req_states)
+  if instance.admin_state not in req_states:
+    raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
+                               (instance, instance.admin_state, msg),
+                               errors.ECODE_STATE)
+
+  if constants.ADMINST_UP not in req_states:
+    pnode = instance.primary_node
+    ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
+    ins_l.Raise("Can't contact node %s for instance information" % pnode,
+                prereq=True, ecode=errors.ECODE_ENVIRON)
+
+    if instance.name in ins_l.payload:
+      raise errors.OpPrereqError("Instance %s is running, %s" %
+                                 (instance.name, msg), errors.ECODE_STATE)
  
  
  def _ExpandItemName(fn, name, kind):
@@ -947,8 +979,8 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
    @param secondary_nodes: list of secondary nodes as strings
    @type os_type: string
    @param os_type: the name of the instance's OS
-  @type status: boolean
-  @param status: the should_run status of the instance
+  @type status: string
+  @param status: the desired status of the instance
    @type memory: string
    @param memory: the memory size of the instance
    @type vcpus: string
@@ -972,17 +1004,13 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
    @return: the hook environment for this instance
  
    """
-  if status:
-    str_status = "up"
-  else:
-    str_status = "down"
    env = {
      "OP_TARGET": name,
      "INSTANCE_NAME": name,
      "INSTANCE_PRIMARY": primary_node,
      "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
      "INSTANCE_OS_TYPE": os_type,
-    "INSTANCE_STATUS": str_status,
+    "INSTANCE_STATUS": status,
      "INSTANCE_MEMORY": memory,
      "INSTANCE_VCPUS": vcpus,
      "INSTANCE_DISK_TEMPLATE": disk_template,
@@ -1074,7 +1102,7 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None):
      "primary_node": instance.primary_node,
      "secondary_nodes": instance.secondary_nodes,
      "os_type": instance.os,
-    "status": instance.admin_up,
+    "status": instance.admin_state,
      "memory": bep[constants.BE_MEMORY],
      "vcpus": bep[constants.BE_VCPUS],
      "nics": _NICListToTuple(lu, instance.nics),
@@ -1204,13 +1232,13 @@ def _GetStorageTypeArgs(cfg, storage_type):
    return []
  
  
-def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
+def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
    faulty = []
  
    for dev in instance.disks:
      cfg.SetDiskID(dev, node_name)
  
-  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
+  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
    result.Raise("Failed to get disk status from node %s" % node_name,
                 prereq=prereq, ecode=errors.ECODE_ENVIRON)
  
@@ -1350,15 +1378,17 @@ class LUClusterDestroy(LogicalUnit):
      """Destroys the cluster.
  
      """
-    master = self.cfg.GetMasterNode()
+    master_params = self.cfg.GetMasterNetworkParameters()
  
      # Run post hooks on master node before it's removed
-    _RunPostHook(self, master)
+    _RunPostHook(self, master_params.name)
  
-    result = self.rpc.call_node_deactivate_master_ip(master)
+    ems = self.cfg.GetUseExternalMipScript()
+    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
+                                                     master_params, ems)
      result.Raise("Could not disable the master role")
  
-    return master
+    return master_params.name
  
  
  def _VerifyCertificate(filename):
@@ -1448,7 +1478,7 @@ class _VerifyErrors(object):
  
      """
      ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
-    itype, etxt = ecode
+    itype, etxt, _ = ecode
      # first complete the msg
      if args:
        msg = msg % args
@@ -1473,7 +1503,7 @@ class _VerifyErrors(object):
  
      # If the error code is in the list of ignored errors, demote the error to a
      # warning
-    (_, etxt) = ecode
+    (_, etxt, _) = ecode
      if etxt in self.op.ignore_errors:     # pylint: disable=E1101
        kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
  
@@ -1931,6 +1961,26 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        _ErrorIf(bool(missing), constants.CV_ENODENET, node,
                 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
  
+  def _VerifyNodeUserScripts(self, ninfo, nresult):
+    """Check the results of user scripts presence and executability on the node
+
+    @type ninfo: L{objects.Node}
+    @param ninfo: the node to check
+    @param nresult: the remote results for the node
+
+    """
+    node = ninfo.name
+
+    test = not constants.NV_USERSCRIPTS in nresult
+    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
+                  "did not return user scripts information")
+
+    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
+    if not test:
+      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
+                    "user scripts not present or not executable: %s" %
+                    utils.CommaJoin(sorted(broken_scripts)))
+
    def _VerifyNodeNetwork(self, ninfo, nresult):
      """Check the node network connectivity results.
  
@@ -1997,7 +2047,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
                   "volume %s missing on node %s", volume, node)
  
-    if instanceconfig.admin_up:
+    if instanceconfig.admin_state == constants.ADMINST_UP:
        pri_img = node_image[node_current]
        test = instance not in pri_img.instances and not pri_img.offline
        _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
@@ -2013,12 +2063,13 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        # node here
        snode = node_image[nname]
        bad_snode = snode.ghost or snode.offline
-      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
+      _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
+               not success and not bad_snode,
                 constants.CV_EINSTANCEFAULTYDISK, instance,
                 "couldn't retrieve status for disk/%s on %s: %s",
                 idx, nname, bdev_status)
-      _ErrorIf((instanceconfig.admin_up and success and
-                bdev_status.ldisk_status == constants.LDS_FAULTY),
+      _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
+                success and bdev_status.ldisk_status == constants.LDS_FAULTY),
                 constants.CV_EINSTANCEFAULTYDISK, instance,
                 "disk/%s on %s is faulty", idx, nname)
  
@@ -2080,7 +2131,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
    @classmethod
    def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
-                   (files_all, files_all_opt, files_mc, files_vm)):
+                   (files_all, files_opt, files_mc, files_vm)):
      """Verifies file checksums collected from all nodes.
  
      @param errorif: Callback for reporting errors
@@ -2089,14 +2140,9 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
      @param all_nvinfo: RPC results
  
      """
-    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
-            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
-           "Found file listed in more than one file list"
-
      # Define functions determining which nodes to consider for a file
      files2nodefn = [
        (files_all, None),
-      (files_all_opt, None),
        (files_mc, lambda node: (node.master_candidate or
                                 node.name == master_node)),
        (files_vm, lambda node: node.vm_capable),
@@ -2113,7 +2159,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
                          frozenset(map(operator.attrgetter("name"), filenodes)))
                         for filename in files)
  
-    assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
+    assert set(nodefiles) == (files_all | files_mc | files_vm)
  
      fileinfo = dict((filename, {}) for filename in nodefiles)
      ignore_nodes = set()
@@ -2155,7 +2201,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        # Nodes missing file
        missing_file = expected_nodes - with_file
  
-      if filename in files_all_opt:
+      if filename in files_opt:
          # All or no nodes
          errorif(missing_file and missing_file != expected_nodes,
                  constants.CV_ECLUSTERFILECHECK, None,
@@ -2231,7 +2277,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          node_drbd[minor] = (instance, False)
        else:
          instance = instanceinfo[instance]
-        node_drbd[minor] = (instance.name, instance.admin_up)
+        node_drbd[minor] = (instance.name,
+                            instance.admin_state == constants.ADMINST_UP)
  
      # and now check them
      used_minors = nresult.get(constants.NV_DRBDLIST, [])
@@ -2629,6 +2676,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      i_non_redundant = [] # Non redundant instances
      i_non_a_balanced = [] # Non auto-balanced instances
+    i_offline = 0 # Count of offline instances
      n_offline = 0 # Count of offline nodes
      n_drained = 0 # Count of nodes being drained
      node_vol_should = {}
@@ -2644,6 +2692,10 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
  
+    user_scripts = []
+    if self.cfg.GetUseExternalMipScript():
+      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
+
      node_verify_param = {
        constants.NV_FILELIST:
          utils.UniqueSequence(filename
@@ -2666,6 +2718,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        constants.NV_MASTERIP: (master_node, master_ip),
        constants.NV_OSLIST: None,
        constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
+      constants.NV_USERSCRIPTS: user_scripts,
        }
  
      if vg_name is not None:
@@ -2824,6 +2877,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        nimg.call_ok = self._VerifyNode(node_i, nresult)
        self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
        self._VerifyNodeNetwork(node_i, nresult)
+      self._VerifyNodeUserScripts(node_i, nresult)
        self._VerifyOob(node_i, nresult)
  
        if nimg.vm_capable:
@@ -2848,6 +2902,12 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          non_primary_inst = set(nimg.instances).difference(nimg.pinst)
  
          for inst in non_primary_inst:
+          # FIXME: investigate best way to handle offline insts
+          if inst.admin_state == constants.ADMINST_OFFLINE:
+            if verbose:
+              feedback_fn("* Skipping offline instance %s" % inst.name)
+            i_offline += 1
+            continue
            test = inst in self.all_inst_info
            _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
                     "instance should not run on node %s", node_i.name)
@@ -2873,7 +2933,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
                 constants.CV_ENODERPC, pnode, "instance %s, connection to"
                 " primary node failed", instance)
  
-      _ErrorIf(inst_config.admin_up and pnode_img.offline,
+      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
+               pnode_img.offline,
                 constants.CV_EINSTANCEBADNODE, instance,
                 "instance is marked as running and lives on offline node %s",
                 inst_config.primary_node)
@@ -2965,6 +3026,9 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
                    % len(i_non_a_balanced))
  
+    if i_offline:
+      feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
+
      if n_offline:
        feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
  
@@ -3127,8 +3191,8 @@ class LUGroupVerifyDisks(NoHooksLU):
      res_missing = {}
  
      nv_dict = _MapInstanceDisksToNodes([inst
-                                        for inst in self.instances.values()
-                                        if inst.admin_up])
+            for inst in self.instances.values()
+            if inst.admin_state == constants.ADMINST_UP])
  
      if nv_dict:
        nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
@@ -3154,7 +3218,7 @@ class LUGroupVerifyDisks(NoHooksLU):
        # any leftover items in nv_dict are missing LVs, let's arrange the data
        # better
        for key, inst in nv_dict.iteritems():
-        res_missing.setdefault(inst, []).append(key)
+        res_missing.setdefault(inst, []).append(list(key))
  
      return (res_nodes, list(res_instances), res_missing)
  
@@ -3169,21 +3233,21 @@ class LUClusterRepairDiskSizes(NoHooksLU):
      if self.op.instances:
        self.wanted_names = _GetWantedInstances(self, self.op.instances)
        self.needed_locks = {
-        locking.LEVEL_NODE: [],
+        locking.LEVEL_NODE_RES: [],
          locking.LEVEL_INSTANCE: self.wanted_names,
          }
-      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
      else:
        self.wanted_names = None
        self.needed_locks = {
-        locking.LEVEL_NODE: locking.ALL_SET,
+        locking.LEVEL_NODE_RES: locking.ALL_SET,
          locking.LEVEL_INSTANCE: locking.ALL_SET,
          }
      self.share_locks = _ShareAll()
  
    def DeclareLocks(self, level):
-    if level == locking.LEVEL_NODE and self.wanted_names is not None:
-      self._LockInstancesNodes(primary_only=True)
+    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
+      self._LockInstancesNodes(primary_only=True, level=level)
  
    def CheckPrereq(self):
      """Check prerequisites.
@@ -3234,6 +3298,11 @@ class LUClusterRepairDiskSizes(NoHooksLU):
        for idx, disk in enumerate(instance.disks):
          per_node_disks[pnode].append((instance, idx, disk))
  
+    assert not (frozenset(per_node_disks.keys()) -
+                self.owned_locks(locking.LEVEL_NODE_RES)), \
+      "Not owning correct locks"
+    assert not self.owned_locks(locking.LEVEL_NODE)
+
      changed = []
      for node, dskl in per_node_disks.items():
        newl = [v[2].Copy() for v in dskl]
@@ -3323,29 +3392,33 @@ class LUClusterRename(LogicalUnit):
  
      """
      clustername = self.op.name
-    ip = self.ip
+    new_ip = self.ip
  
      # shutdown the master IP
-    master = self.cfg.GetMasterNode()
-    result = self.rpc.call_node_deactivate_master_ip(master)
+    master_params = self.cfg.GetMasterNetworkParameters()
+    ems = self.cfg.GetUseExternalMipScript()
+    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
+                                                     master_params, ems)
      result.Raise("Could not disable the master role")
  
      try:
        cluster = self.cfg.GetClusterInfo()
        cluster.cluster_name = clustername
-      cluster.master_ip = ip
+      cluster.master_ip = new_ip
        self.cfg.Update(cluster, feedback_fn)
  
        # update the known hosts file
        ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
        node_list = self.cfg.GetOnlineNodeList()
        try:
-        node_list.remove(master)
+        node_list.remove(master_params.name)
        except ValueError:
          pass
        _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
      finally:
-      result = self.rpc.call_node_activate_master_ip(master)
+      master_params.ip = new_ip
+      result = self.rpc.call_node_activate_master_ip(master_params.name,
+                                                     master_params, ems)
        msg = result.fail_msg
        if msg:
          self.LogWarning("Could not re-enable the master role on"
@@ -3675,6 +3748,9 @@ class LUClusterSetParams(LogicalUnit):
      if self.op.reserved_lvs is not None:
        self.cluster.reserved_lvs = self.op.reserved_lvs
  
+    if self.op.use_external_mip_script is not None:
+      self.cluster.use_external_mip_script = self.op.use_external_mip_script
+
      def helper_os(aname, mods, desc):
        desc += " OS list"
        lst = getattr(self.cluster, aname)
@@ -3699,33 +3775,40 @@ class LUClusterSetParams(LogicalUnit):
        helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
  
      if self.op.master_netdev:
-      master = self.cfg.GetMasterNode()
+      master_params = self.cfg.GetMasterNetworkParameters()
+      ems = self.cfg.GetUseExternalMipScript()
        feedback_fn("Shutting down master ip on the current netdev (%s)" %
                    self.cluster.master_netdev)
-      result = self.rpc.call_node_deactivate_master_ip(master)
+      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
+                                                       master_params, ems)
        result.Raise("Could not disable the master ip")
        feedback_fn("Changing master_netdev from %s to %s" %
-                  (self.cluster.master_netdev, self.op.master_netdev))
+                  (master_params.netdev, self.op.master_netdev))
        self.cluster.master_netdev = self.op.master_netdev
  
      if self.op.master_netmask:
-      master = self.cfg.GetMasterNode()
+      master_params = self.cfg.GetMasterNetworkParameters()
        feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
-      result = self.rpc.call_node_change_master_netmask(master,
-                                                        self.op.master_netmask)
+      result = self.rpc.call_node_change_master_netmask(master_params.name,
+                                                        master_params.netmask,
+                                                        self.op.master_netmask,
+                                                        master_params.ip,
+                                                        master_params.netdev)
        if result.fail_msg:
          msg = "Could not change the master IP netmask: %s" % result.fail_msg
-        self.LogWarning(msg)
          feedback_fn(msg)
-      else:
-        self.cluster.master_netmask = self.op.master_netmask
+
+      self.cluster.master_netmask = self.op.master_netmask
  
      self.cfg.Update(self.cluster, feedback_fn)
  
      if self.op.master_netdev:
+      master_params = self.cfg.GetMasterNetworkParameters()
        feedback_fn("Starting the master ip on the new master netdev (%s)" %
                    self.op.master_netdev)
-      result = self.rpc.call_node_activate_master_ip(master)
+      ems = self.cfg.GetUseExternalMipScript()
+      result = self.rpc.call_node_activate_master_ip(master_params.name,
+                                                     master_params, ems)
        if result.fail_msg:
          self.LogWarning("Could not re-enable the master ip on"
                          " the master, please restart manually: %s",
@@ -3758,6 +3841,9 @@ def _ComputeAncillaryFiles(cluster, redist):
      constants.SSH_KNOWN_HOSTS_FILE,
      constants.CONFD_HMAC_KEY,
      constants.CLUSTER_DOMAIN_SECRET_FILE,
+    constants.SPICE_CERT_FILE,
+    constants.SPICE_CACERT_FILE,
+    constants.RAPI_USERS_FILE,
      ])
  
    if not redist:
@@ -3770,27 +3856,43 @@ def _ComputeAncillaryFiles(cluster, redist):
    if cluster.modify_etc_hosts:
      files_all.add(constants.ETC_HOSTS)
  
-  # Files which must either exist on all nodes or on none
-  files_all_opt = set([
+  # Files which are optional, these must:
+  # - be present in one other category as well
+  # - either exist or not exist on all nodes of that category (mc, vm all)
+  files_opt = set([
      constants.RAPI_USERS_FILE,
      ])
  
    # Files which should only be on master candidates
    files_mc = set()
+
    if not redist:
      files_mc.add(constants.CLUSTER_CONF_FILE)
  
+    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
+    # replication
+    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
+
    # Files which should only be on VM-capable nodes
    files_vm = set(filename
      for hv_name in cluster.enabled_hypervisors
-    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
+    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
  
-  # Filenames must be unique
-  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
-          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
+  files_opt |= set(filename
+    for hv_name in cluster.enabled_hypervisors
+    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
+
+  # Filenames in each category must be unique
+  all_files_set = files_all | files_mc | files_vm
+  assert (len(all_files_set) ==
+          sum(map(len, [files_all, files_mc, files_vm]))), \
           "Found file listed in more than one file list"
  
-  return (files_all, files_all_opt, files_mc, files_vm)
+  # Optional files must be present in one other category
+  assert all_files_set.issuperset(files_opt), \
+         "Optional file not in a different required list"
+
+  return (files_all, files_opt, files_mc, files_vm)
  
  
  def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
@@ -3824,7 +3926,7 @@ def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
        nodelist.remove(master_info.name)
  
    # Gather file lists
-  (files_all, files_all_opt, files_mc, files_vm) = \
+  (files_all, _, files_mc, files_vm) = \
      _ComputeAncillaryFiles(cluster, True)
  
    # Never re-distribute configuration file from here
@@ -3834,7 +3936,6 @@ def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
  
    filemap = [
      (online_nodes, files_all),
-    (online_nodes, files_all_opt),
      (vm_nodes, files_vm),
      ]
  
@@ -3874,8 +3975,10 @@ class LUClusterActivateMasterIp(NoHooksLU):
      """Activate the master IP.
  
      """
-    master = self.cfg.GetMasterNode()
-    self.rpc.call_node_activate_master_ip(master)
+    master_params = self.cfg.GetMasterNetworkParameters()
+    ems = self.cfg.GetUseExternalMipScript()
+    self.rpc.call_node_activate_master_ip(master_params.name,
+                                          master_params, ems)
  
  
  class LUClusterDeactivateMasterIp(NoHooksLU):
@@ -3886,8 +3989,10 @@ class LUClusterDeactivateMasterIp(NoHooksLU):
      """Deactivate the master IP.
  
      """
-    master = self.cfg.GetMasterNode()
-    self.rpc.call_node_deactivate_master_ip(master)
+    master_params = self.cfg.GetMasterNetworkParameters()
+    ems = self.cfg.GetUseExternalMipScript()
+    self.rpc.call_node_deactivate_master_ip(master_params.name, master_params,
+                                            ems)
  
  
  def _WaitForSync(lu, instance, disks=None, oneshot=False):
@@ -4409,6 +4514,9 @@ class LUNodeRemove(LogicalUnit):
  
      modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
  
+    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
+      "Not owning BGL"
+
      # Promote nodes to master candidate as needed
      _AdjustCandidatePool(self, exceptions=[node.name])
      self.context.RemoveNode(node.name)
@@ -4522,6 +4630,9 @@ class LUNodeQuery(NoHooksLU):
    def ExpandNames(self):
      self.nq.ExpandNames(self)
  
+  def DeclareLocks(self, level):
+    self.nq.DeclareLocks(self, level)
+
    def Exec(self, feedback_fn):
      return self.nq.OldStyleQuery(self)
  
@@ -4540,8 +4651,9 @@ class LUNodeQueryvols(NoHooksLU):
                         selected=self.op.output_fields)
  
    def ExpandNames(self):
+    self.share_locks = _ShareAll()
      self.needed_locks = {}
-    self.share_locks[locking.LEVEL_NODE] = 1
+
      if not self.op.nodes:
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
      else:
@@ -4608,8 +4720,8 @@ class LUNodeQueryStorage(NoHooksLU):
                         selected=self.op.output_fields)
  
    def ExpandNames(self):
+    self.share_locks = _ShareAll()
      self.needed_locks = {}
-    self.share_locks[locking.LEVEL_NODE] = 1
  
      if self.op.nodes:
        self.needed_locks[locking.LEVEL_NODE] = \
@@ -4820,7 +4932,7 @@ class LUQuery(NoHooksLU):
    def CheckArguments(self):
      qcls = _GetQueryImplementation(self.op.what)
  
-    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
+    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
  
    def ExpandNames(self):
      self.impl.ExpandNames(self)
@@ -5075,6 +5187,9 @@ class LUNodeAdd(LogicalUnit):
      new_node = self.new_node
      node = new_node.name
  
+    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
+      "Not owning BGL"
+
      # We adding a new node so we assume it's powered
      new_node.powered = True
  
@@ -5213,35 +5328,32 @@ class LUNodeSetParams(LogicalUnit):
      self.lock_all = self.op.auto_promote and self.might_demote
      self.lock_instances = self.op.secondary_ip is not None
  
+  def _InstanceFilter(self, instance):
+    """Filter for getting affected instances.
+
+    """
+    return (instance.disk_template in constants.DTS_INT_MIRROR and
+            self.op.node_name in instance.all_nodes)
+
    def ExpandNames(self):
      if self.lock_all:
        self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
      else:
        self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
  
-    if self.lock_instances:
-      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
+    # Since modifying a node can have severe effects on currently running
+    # operations the resource lock is at least acquired in shared mode
+    self.needed_locks[locking.LEVEL_NODE_RES] = \
+      self.needed_locks[locking.LEVEL_NODE]
  
-  def DeclareLocks(self, level):
-    # If we have locked all instances, before waiting to lock nodes, release
-    # all the ones living on nodes unrelated to the current operation.
-    if level == locking.LEVEL_NODE and self.lock_instances:
-      self.affected_instances = []
-      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
-        instances_keep = []
-
-        # Build list of instances to release
-        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
-        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
-          if (instance.disk_template in constants.DTS_INT_MIRROR and
-              self.op.node_name in instance.all_nodes):
-            instances_keep.append(instance_name)
-            self.affected_instances.append(instance)
-
-        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
-
-        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
-                set(instances_keep))
+    # Get node resource and instance locks in shared mode; they are not used
+    # for anything but read-only access
+    self.share_locks[locking.LEVEL_NODE_RES] = 1
+    self.share_locks[locking.LEVEL_INSTANCE] = 1
+
+    if self.lock_instances:
+      self.needed_locks[locking.LEVEL_INSTANCE] = \
+        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -5273,6 +5385,25 @@ class LUNodeSetParams(LogicalUnit):
      """
      node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
  
+    if self.lock_instances:
+      affected_instances = \
+        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
+
+      # Verify instance locks
+      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
+      wanted_instances = frozenset(affected_instances.keys())
+      if wanted_instances - owned_instances:
+        raise errors.OpPrereqError("Instances affected by changing node %s's"
+                                   " secondary IP address have changed since"
+                                   " locks were acquired, wanted '%s', have"
+                                   " '%s'; retry the operation" %
+                                   (self.op.node_name,
+                                    utils.CommaJoin(wanted_instances),
+                                    utils.CommaJoin(owned_instances)),
+                                   errors.ECODE_STATE)
+    else:
+      affected_instances = None
+
      if (self.op.master_candidate is not None or
          self.op.drained is not None or
          self.op.offline is not None):
@@ -5362,7 +5493,9 @@ class LUNodeSetParams(LogicalUnit):
  
      if old_role == self._ROLE_OFFLINE and new_role != old_role:
        # Trying to transition out of offline status
-      result = self.rpc.call_version([node.name])[node.name]
+      # TODO: Use standard RPC runner, but make sure it works when the node is
+      # still marked offline
+      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
        if result.fail_msg:
          raise errors.OpPrereqError("Node %s is being de-offlined but fails"
                                     " to report its version: %s" %
@@ -5381,16 +5514,21 @@ class LUNodeSetParams(LogicalUnit):
          raise errors.OpPrereqError("Cannot change the secondary ip on a single"
                                     " homed cluster", errors.ECODE_INVAL)
  
+      assert not (frozenset(affected_instances) -
+                  self.owned_locks(locking.LEVEL_INSTANCE))
+
        if node.offline:
-        if self.affected_instances:
-          raise errors.OpPrereqError("Cannot change secondary ip: offline"
-                                     " node has instances (%s) configured"
-                                     " to use it" % self.affected_instances)
+        if affected_instances:
+          raise errors.OpPrereqError("Cannot change secondary IP address:"
+                                     " offline node has instances (%s)"
+                                     " configured to use it" %
+                                     utils.CommaJoin(affected_instances.keys()))
        else:
          # On online nodes, check that no instances are running, and that
          # the node has the new ip and we can reach it.
-        for instance in self.affected_instances:
-          _CheckInstanceDown(self, instance, "cannot change secondary ip")
+        for instance in affected_instances.values():
+          _CheckInstanceState(self, instance, INSTANCE_DOWN,
+                              msg="cannot change secondary ip")
  
          _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
          if master.name != node.name:
@@ -5542,6 +5680,7 @@ class LUClusterQuery(NoHooksLU):
        "candidate_pool_size": cluster.candidate_pool_size,
        "master_netdev": cluster.master_netdev,
        "master_netmask": cluster.master_netmask,
+      "use_external_mip_script": cluster.use_external_mip_script,
        "volume_group_name": cluster.volume_group_name,
        "drbd_usermode_helper": cluster.drbd_usermode_helper,
        "file_storage_dir": cluster.file_storage_dir,
@@ -5787,7 +5926,7 @@ def _SafeShutdownInstanceDisks(lu, instance, disks=None):
    _ShutdownInstanceDisks.
  
    """
-  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
+  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
    _ShutdownInstanceDisks(lu, instance, disks=disks)
  
  
@@ -6025,6 +6164,8 @@ class LUInstanceStartup(LogicalUnit):
        hv_type.CheckParameterSyntax(filled_hvp)
        _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
  
+    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
+
      self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
  
      if self.primary_offline and self.op.ignore_offline_nodes:
@@ -6068,9 +6209,11 @@ class LUInstanceStartup(LogicalUnit):
  
        _StartInstanceDisks(self, instance, force)
  
-      result = self.rpc.call_instance_start(node_current, instance,
-                                            self.op.hvparams, self.op.beparams,
-                                            self.op.startup_paused)
+      result = \
+        self.rpc.call_instance_start(node_current,
+                                     (instance, self.op.hvparams,
+                                      self.op.beparams),
+                                     self.op.startup_paused)
        msg = result.fail_msg
        if msg:
          _ShutdownInstanceDisks(self, instance)
@@ -6120,7 +6263,7 @@ class LUInstanceReboot(LogicalUnit):
      self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
-
+    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
      _CheckNodeOnline(self, instance.primary_node)
  
      # check bridges existence
@@ -6160,8 +6303,8 @@ class LUInstanceReboot(LogicalUnit):
          self.LogInfo("Instance %s was already stopped, starting now",
                       instance.name)
        _StartInstanceDisks(self, instance, ignore_secondaries)
-      result = self.rpc.call_instance_start(node_current, instance,
-                                            None, None, False)
+      result = self.rpc.call_instance_start(node_current,
+                                            (instance, None, None), False)
        msg = result.fail_msg
        if msg:
          _ShutdownInstanceDisks(self, instance)
@@ -6209,6 +6352,8 @@ class LUInstanceShutdown(LogicalUnit):
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
  
+    _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
+
      self.primary_offline = \
        self.cfg.GetNodeInfo(self.instance.primary_node).offline
  
@@ -6285,7 +6430,7 @@ class LUInstanceReinstall(LogicalUnit):
        raise errors.OpPrereqError("Instance '%s' has no disks" %
                                   self.op.instance_name,
                                   errors.ECODE_INVAL)
-    _CheckInstanceDown(self, instance, "cannot reinstall")
+    _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
  
      if self.op.os_type is not None:
        # OS verification
@@ -6322,9 +6467,9 @@ class LUInstanceReinstall(LogicalUnit):
      try:
        feedback_fn("Running the instance OS create scripts...")
        # FIXME: pass debug option from opcode to backend
-      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
-                                             self.op.debug_level,
-                                             osparams=self.os_inst)
+      result = self.rpc.call_instance_os_add(inst.primary_node,
+                                             (inst, self.os_inst), True,
+                                             self.op.debug_level)
        result.Raise("Could not install OS for instance %s on node %s" %
                     (inst.name, inst.primary_node))
      finally:
@@ -6358,6 +6503,10 @@ class LUInstanceRecreateDisks(LogicalUnit):
        # otherwise we need to lock all nodes for disk re-creation
        primary_only = bool(self.op.nodes)
        self._LockInstancesNodes(primary_only=primary_only)
+    elif level == locking.LEVEL_NODE_RES:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -6404,10 +6553,12 @@ class LUInstanceRecreateDisks(LogicalUnit):
                                   self.op.instance_name, errors.ECODE_INVAL)
      # if we replace nodes *and* the old primary is offline, we don't
      # check
-    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
+    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
+    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
      old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
      if not (self.op.nodes and old_pnode.offline):
-      _CheckInstanceDown(self, instance, "cannot recreate disks")
+      _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
+                          msg="cannot recreate disks")
  
      if not self.op.disks:
        self.op.disks = range(len(instance.disks))
@@ -6428,6 +6579,9 @@ class LUInstanceRecreateDisks(LogicalUnit):
      """
      instance = self.instance
  
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES))
+
      to_skip = []
      mods = [] # keeps track of needed logical_id changes
  
@@ -6510,7 +6664,8 @@ class LUInstanceRename(LogicalUnit):
      instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert instance is not None
      _CheckNodeOnline(self, instance.primary_node)
-    _CheckInstanceDown(self, instance, "cannot rename")
+    _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
+                        msg="cannot rename")
      self.instance = instance
  
      new_name = self.op.new_name
@@ -6596,11 +6751,16 @@ class LUInstanceRemove(LogicalUnit):
    def ExpandNames(self):
      self._ExpandAndLockInstance()
      self.needed_locks[locking.LEVEL_NODE] = []
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
  
    def DeclareLocks(self, level):
      if level == locking.LEVEL_NODE:
        self._LockInstancesNodes()
+    elif level == locking.LEVEL_NODE_RES:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -6649,6 +6809,12 @@ class LUInstanceRemove(LogicalUnit):
                                   " node %s: %s" %
                                   (instance.name, instance.primary_node, msg))
  
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES))
+    assert not (set(instance.all_nodes) -
+                self.owned_locks(locking.LEVEL_NODE)), \
+      "Not owning correct locks"
+
      _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
  
  
@@ -6862,11 +7028,16 @@ class LUInstanceMove(LogicalUnit):
      target_node = _ExpandNodeName(self.cfg, self.op.target_node)
      self.op.target_node = target_node
      self.needed_locks[locking.LEVEL_NODE] = [target_node]
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
  
    def DeclareLocks(self, level):
      if level == locking.LEVEL_NODE:
        self._LockInstancesNodes(primary_only=True)
+    elif level == locking.LEVEL_NODE_RES:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -6924,7 +7095,7 @@ class LUInstanceMove(LogicalUnit):
      _CheckNodeNotDrained(self, target_node)
      _CheckNodeVmCapable(self, target_node)
  
-    if instance.admin_up:
+    if instance.admin_state == constants.ADMINST_UP:
        # check memory requirements on the secondary node
        _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
                             instance.name, bep[constants.BE_MEMORY],
@@ -6951,6 +7122,9 @@ class LUInstanceMove(LogicalUnit):
      self.LogInfo("Shutting down instance %s on source node %s",
                   instance.name, source_node)
  
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES))
+
      result = self.rpc.call_instance_shutdown(source_node, instance,
                                               self.op.shutdown_timeout)
      msg = result.fail_msg
@@ -7015,7 +7189,7 @@ class LUInstanceMove(LogicalUnit):
      _RemoveDisks(self, instance, target_node=source_node)
  
      # Only start the instance if it's marked as up
-    if instance.admin_up:
+    if instance.admin_state == constants.ADMINST_UP:
        self.LogInfo("Starting instance %s on node %s",
                     instance.name, target_node)
  
@@ -7025,8 +7199,8 @@ class LUInstanceMove(LogicalUnit):
          _ShutdownInstanceDisks(self, instance)
          raise errors.OpExecError("Can't activate the instance's disks")
  
-      result = self.rpc.call_instance_start(target_node, instance,
-                                            None, None, False)
+      result = self.rpc.call_instance_start(target_node,
+                                            (instance, None, None), False)
        msg = result.fail_msg
        if msg:
          _ShutdownInstanceDisks(self, instance)
@@ -7153,10 +7327,11 @@ class TLMigrateInstance(Tasklet):
      assert instance is not None
      self.instance = instance
  
-    if (not self.cleanup and not instance.admin_up and not self.failover and
-        self.fallback):
-      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
-                      " to failover")
+    if (not self.cleanup and
+        not instance.admin_state == constants.ADMINST_UP and
+        not self.failover and self.fallback):
+      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
+                      " switching to failover")
        self.failover = True
  
      if instance.disk_template not in constants.DTS_MIRRORED:
@@ -7215,7 +7390,7 @@ class TLMigrateInstance(Tasklet):
      i_be = self.cfg.GetClusterInfo().FillBE(instance)
  
      # check memory requirements on the secondary node
-    if not self.failover or instance.admin_up:
+    if not self.failover or instance.admin_state == constants.ADMINST_UP:
        _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
                             instance.name, i_be[constants.BE_MEMORY],
                             instance.hypervisor)
@@ -7632,7 +7807,7 @@ class TLMigrateInstance(Tasklet):
      source_node = instance.primary_node
      target_node = self.target_node
  
-    if instance.admin_up:
+    if instance.admin_state == constants.ADMINST_UP:
        self.feedback_fn("* checking disk consistency between source and target")
        for dev in instance.disks:
          # for drbd, these are drbd over lvm
@@ -7675,7 +7850,7 @@ class TLMigrateInstance(Tasklet):
      self.cfg.Update(instance, self.feedback_fn)
  
      # Only start the instance if it's marked as up
-    if instance.admin_up:
+    if instance.admin_state == constants.ADMINST_UP:
        self.feedback_fn("* activating the instance's disks on target node %s" %
                         target_node)
        logging.info("Starting instance %s on node %s",
@@ -7689,7 +7864,7 @@ class TLMigrateInstance(Tasklet):
  
        self.feedback_fn("* starting the instance on the target node %s" %
                         target_node)
-      result = self.rpc.call_instance_start(target_node, instance, None, None,
+      result = self.rpc.call_instance_start(target_node, (instance, None, None),
                                              False)
        msg = result.fail_msg
        if msg:
@@ -7821,7 +7996,7 @@ def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
    shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
    dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
                            logical_id=(vgnames[0], names[0]))
-  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
+  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
                            logical_id=(vgnames[1], names[1]))
    drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
                            logical_id=(primary, secondary, port,
@@ -8141,7 +8316,7 @@ def _ComputeDiskSizePerVG(disk_template, disks):
      constants.DT_DISKLESS: {},
      constants.DT_PLAIN: _compute(disks, 0),
      # 128 MB are added for drbd metadata for each disk
-    constants.DT_DRBD8: _compute(disks, 128),
+    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
      constants.DT_FILE: {},
      constants.DT_SHARED_FILE: {},
    }
@@ -8162,7 +8337,8 @@ def _ComputeDiskSize(disk_template, disks):
      constants.DT_DISKLESS: None,
      constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
      # 128 MB are added for drbd metadata for each disk
-    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
+    constants.DT_DRBD8:
+      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
      constants.DT_FILE: None,
      constants.DT_SHARED_FILE: 0,
      constants.DT_BLOCK: 0,
@@ -8208,9 +8384,11 @@ def _CheckHVParams(lu, nodenames, hvname, hvparams):
  
    """
    nodenames = _FilterVmNodes(lu, nodenames)
-  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
-                                                  hvname,
-                                                  hvparams)
+
+  cluster = lu.cfg.GetClusterInfo()
+  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
+
+  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
    for node in nodenames:
      info = hvinfo[node]
      if info.offline:
@@ -8236,7 +8414,7 @@ def _CheckOSParams(lu, required, nodenames, osname, osparams):
  
    """
    nodenames = _FilterVmNodes(lu, nodenames)
-  result = lu.rpc.call_os_validate(required, nodenames, osname,
+  result = lu.rpc.call_os_validate(nodenames, required, osname,
                                     [constants.OS_VALIDATE_PARAMETERS],
                                     osparams)
    for node, nres in result.items():
@@ -8429,7 +8607,11 @@ class LUInstanceCreate(LogicalUnit):
      self.add_locks[locking.LEVEL_INSTANCE] = instance_name
  
      if self.op.iallocator:
+      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
+      # specifying a group on instance creation and then selecting nodes from
+      # that group
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
      else:
        self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
        nodelist = [self.op.pnode]
@@ -8437,6 +8619,9 @@ class LUInstanceCreate(LogicalUnit):
          self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
          nodelist.append(self.op.snode)
        self.needed_locks[locking.LEVEL_NODE] = nodelist
+      # Lock resources of instance's primary and secondary nodes (copy to
+      # prevent accidential modification)
+      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
  
      # in case of import lock the source node too
      if self.op.mode == constants.INSTANCE_IMPORT:
@@ -9043,6 +9228,10 @@ class LUInstanceCreate(LogicalUnit):
      instance = self.op.instance_name
      pnode_name = self.pnode.name
  
+    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
+                self.owned_locks(locking.LEVEL_NODE)), \
+      "Node locks differ from node resource locks"
+
      ht_kind = self.op.hypervisor
      if ht_kind in constants.HTS_REQ_PORT:
        network_port = self.cfg.AllocatePort()
@@ -9063,7 +9252,7 @@ class LUInstanceCreate(LogicalUnit):
                              primary_node=pnode_name,
                              nics=self.nics, disks=disks,
                              disk_template=self.op.disk_template,
-                            admin_up=False,
+                            admin_state=constants.ADMINST_DOWN,
                              network_port=network_port,
                              beparams=self.op.beparams,
                              hvparams=self.op.hvparams,
@@ -9145,6 +9334,9 @@ class LUInstanceCreate(LogicalUnit):
        raise errors.OpExecError("There are some degraded disks for"
                                 " this instance")
  
+    # Release all node resource locks
+    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
+
      if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
        if self.op.mode == constants.INSTANCE_CREATE:
          if not self.op.no_install:
@@ -9162,7 +9354,7 @@ class LUInstanceCreate(LogicalUnit):
            feedback_fn("* running the instance OS create scripts...")
            # FIXME: pass debug option from opcode to backend
            os_add_result = \
-            self.rpc.call_instance_os_add(pnode_name, iobj, False,
+            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
                                            self.op.debug_level)
            if pause_sync:
              feedback_fn("* resuming disk sync")
@@ -9237,13 +9429,15 @@ class LUInstanceCreate(LogicalUnit):
          raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
                                       % self.op.mode)
  
+    assert not self.owned_locks(locking.LEVEL_NODE_RES)
+
      if self.op.start:
-      iobj.admin_up = True
+      iobj.admin_state = constants.ADMINST_UP
        self.cfg.Update(iobj, feedback_fn)
        logging.info("Starting instance %s on node %s", instance, pnode_name)
        feedback_fn("* starting instance...")
-      result = self.rpc.call_instance_start(pnode_name, iobj,
-                                            None, None, False)
+      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
+                                            False)
        result.Raise("Could not start instance")
  
      return list(iobj.all_nodes)
@@ -9260,6 +9454,7 @@ class LUInstanceConsole(NoHooksLU):
    REQ_BGL = False
  
    def ExpandNames(self):
+    self.share_locks = _ShareAll()
      self._ExpandAndLockInstance()
  
    def CheckPrereq(self):
@@ -9285,10 +9480,12 @@ class LUInstanceConsole(NoHooksLU):
      node_insts.Raise("Can't get node information from %s" % node)
  
      if instance.name not in node_insts.payload:
-      if instance.admin_up:
+      if instance.admin_state == constants.ADMINST_UP:
          state = constants.INSTST_ERRORDOWN
-      else:
+      elif instance.admin_state == constants.ADMINST_DOWN:
          state = constants.INSTST_ADMINDOWN
+      else:
+        state = constants.INSTST_ADMINOFFLINE
        raise errors.OpExecError("Instance %s is not running (state %s)" %
                                 (instance.name, state))
  
@@ -9334,6 +9531,7 @@ class LUInstanceReplaceDisks(LogicalUnit):
      self._ExpandAndLockInstance()
  
      assert locking.LEVEL_NODE not in self.needed_locks
+    assert locking.LEVEL_NODE_RES not in self.needed_locks
      assert locking.LEVEL_NODEGROUP not in self.needed_locks
  
      assert self.op.iallocator is None or self.op.remote_node is None, \
@@ -9356,6 +9554,8 @@ class LUInstanceReplaceDisks(LogicalUnit):
          # iallocator will select a new node in the same group
          self.needed_locks[locking.LEVEL_NODEGROUP] = []
  
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
+
      self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
                                     self.op.iallocator, self.op.remote_node,
                                     self.op.disks, False, self.op.early_release)
@@ -9369,6 +9569,8 @@ class LUInstanceReplaceDisks(LogicalUnit):
        assert not self.needed_locks[locking.LEVEL_NODEGROUP]
  
        self.share_locks[locking.LEVEL_NODEGROUP] = 1
+      # Lock all groups used by instance optimistically; this requires going
+      # via the node before it's locked, requiring verification later on
        self.needed_locks[locking.LEVEL_NODEGROUP] = \
          self.cfg.GetInstanceNodeGroups(self.op.instance_name)
  
@@ -9383,6 +9585,10 @@ class LUInstanceReplaceDisks(LogicalUnit):
            for node_name in self.cfg.GetNodeGroup(group_uuid).members]
        else:
          self._LockInstancesNodes()
+    elif level == locking.LEVEL_NODE_RES:
+      # Reuse node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -9419,6 +9625,7 @@ class LUInstanceReplaceDisks(LogicalUnit):
      assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
              self.op.iallocator is None)
  
+    # Verify if node group locks are still correct
      owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
      if owned_groups:
        _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
@@ -9677,8 +9884,9 @@ class TLReplaceDisks(Tasklet):
                                                            self.target_node]
                                if node_name is not None)
  
-    # Release unneeded node locks
+    # Release unneeded node and node resource locks
      _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
+    _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
  
      # Release any owned node group
      if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
@@ -9707,6 +9915,8 @@ class TLReplaceDisks(Tasklet):
        assert set(owned_nodes) == set(self.node_secondary_ip), \
            ("Incorrect node locks, owning %s, expected %s" %
             (owned_nodes, self.node_secondary_ip.keys()))
+      assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
+              self.lu.owned_locks(locking.LEVEL_NODE_RES))
  
        owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
        assert list(owned_instances) == [self.instance_name], \
@@ -9722,7 +9932,7 @@ class TLReplaceDisks(Tasklet):
      feedback_fn("Replacing disk(s) %s for %s" %
                  (utils.CommaJoin(self.disks), self.instance.name))
  
-    activate_disks = (not self.instance.admin_up)
+    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
  
      # Activate the instance disks if we're replacing them on a down instance
      if activate_disks:
@@ -9742,9 +9952,11 @@ class TLReplaceDisks(Tasklet):
        if activate_disks:
          _SafeShutdownInstanceDisks(self.lu, self.instance)
  
+    assert not self.lu.owned_locks(locking.LEVEL_NODE)
+
      if __debug__:
        # Verify owned locks
-      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
+      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
        nodes = frozenset(self.node_secondary_ip)
        assert ((self.early_release and not owned_nodes) or
                (not self.early_release and not (set(owned_nodes) - nodes))), \
@@ -9827,7 +10039,7 @@ class TLReplaceDisks(Tasklet):
        lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
                               logical_id=(vg_data, names[0]))
        vg_meta = dev.children[1].logical_id[0]
-      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
+      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
                               logical_id=(vg_meta, names[1]))
  
        new_lvs = [lv_data, lv_meta]
@@ -9984,10 +10196,18 @@ class TLReplaceDisks(Tasklet):
        self.lu.LogStep(cstep, steps_total, "Removing old storage")
        cstep += 1
        self._RemoveOldStorage(self.target_node, iv_names)
-      # WARNING: we release both node locks here, do not do other RPCs
-      # than WaitForSync to the primary node
-      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
-                    names=[self.target_node, self.other_node])
+      # TODO: Check if releasing locks early still makes sense
+      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
+    else:
+      # Release all resource locks except those used by the instance
+      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
+                    keep=self.node_secondary_ip.keys())
+
+    # Release all node locks while waiting for sync
+    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
+
+    # TODO: Can the instance lock be downgraded here? Take the optional disk
+    # shutdown in the caller into consideration.
  
      # Wait for sync
      # This can fail as the old devices are degraded and _WaitForSync
@@ -10121,6 +10341,9 @@ class TLReplaceDisks(Tasklet):
  
      self.cfg.Update(self.instance, feedback_fn)
  
+    # Release all node locks (the configuration has been updated)
+    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
+
      # and now perform the drbd attach
      self.lu.LogInfo("Attaching primary drbds to new secondary"
                      " (standalone => connected)")
@@ -10142,12 +10365,15 @@ class TLReplaceDisks(Tasklet):
        self.lu.LogStep(cstep, steps_total, "Removing old storage")
        cstep += 1
        self._RemoveOldStorage(self.target_node, iv_names)
-      # WARNING: we release all node locks here, do not do other RPCs
-      # than WaitForSync to the primary node
-      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
-                    names=[self.instance.primary_node,
-                           self.target_node,
-                           self.new_node])
+      # TODO: Check if releasing locks early still makes sense
+      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
+    else:
+      # Release all resource locks except those used by the instance
+      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
+                    keep=self.node_secondary_ip.keys())
+
+    # TODO: Can the instance lock be downgraded here? Take the optional disk
+    # shutdown in the caller into consideration.
  
      # Wait for sync
      # This can fail as the old devices are degraded and _WaitForSync
@@ -10207,7 +10433,7 @@ class LURepairNodeStorage(NoHooksLU):
      """
      # Check whether any instance on this node has faulty disks
      for inst in _GetNodeInstances(self.cfg, self.op.node_name):
-      if not inst.admin_up:
+      if inst.admin_state != constants.ADMINST_UP:
          continue
        check_nodes = set(inst.all_nodes)
        check_nodes.discard(self.op.node_name)
@@ -10457,11 +10683,16 @@ class LUInstanceGrowDisk(LogicalUnit):
    def ExpandNames(self):
      self._ExpandAndLockInstance()
      self.needed_locks[locking.LEVEL_NODE] = []
-    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
+    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
  
    def DeclareLocks(self, level):
      if level == locking.LEVEL_NODE:
        self._LockInstancesNodes()
+    elif level == locking.LEVEL_NODE_RES:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -10518,10 +10749,18 @@ class LUInstanceGrowDisk(LogicalUnit):
      instance = self.instance
      disk = self.disk
  
+    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES))
+
      disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
      if not disks_ok:
        raise errors.OpExecError("Cannot activate block device to grow")
  
+    feedback_fn("Growing disk %s of instance '%s' by %s" %
+                (self.op.disk, instance.name,
+                 utils.FormatUnit(self.op.amount, "h")))
+
      # First run all grow ops in dry-run mode
      for node in instance.all_nodes:
        self.cfg.SetDiskID(disk, node)
@@ -10544,18 +10783,28 @@ class LUInstanceGrowDisk(LogicalUnit):
  
      disk.RecordGrow(self.op.amount)
      self.cfg.Update(instance, feedback_fn)
+
+    # Changes have been recorded, release node lock
+    _ReleaseLocks(self, locking.LEVEL_NODE)
+
+    # Downgrade lock while waiting for sync
+    self.glm.downgrade(locking.LEVEL_INSTANCE)
+
      if self.op.wait_for_sync:
        disk_abort = not _WaitForSync(self, instance, disks=[disk])
        if disk_abort:
          self.proc.LogWarning("Disk sync-ing has not returned a good"
                               " status; please check the instance")
-      if not instance.admin_up:
+      if instance.admin_state != constants.ADMINST_UP:
          _SafeShutdownInstanceDisks(self, instance, disks=[disk])
-    elif not instance.admin_up:
+    elif instance.admin_state != constants.ADMINST_UP:
        self.proc.LogWarning("Not shutting down the disk even if the instance is"
                             " not supposed to be running because no wait for"
                             " sync mode was requested")
  
+    assert self.owned_locks(locking.LEVEL_NODE_RES)
+    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
+
  
  class LUInstanceQueryData(NoHooksLU):
    """Query runtime instance data.
@@ -10687,19 +10936,17 @@ class LUInstanceQueryData(NoHooksLU):
          if remote_info and "state" in remote_info:
            remote_state = "up"
          else:
-          remote_state = "down"
-
-      if instance.admin_up:
-        config_state = "up"
-      else:
-        config_state = "down"
+          if instance.admin_state == constants.ADMINST_UP:
+            remote_state = "down"
+          else:
+            remote_state = instance.admin_state
  
        disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
                    instance.disks)
  
        result[instance.name] = {
          "name": instance.name,
-        "config_state": config_state,
+        "config_state": instance.admin_state,
          "run_state": remote_state,
          "pnode": instance.primary_node,
          "snodes": instance.secondary_nodes,
@@ -10735,7 +10982,8 @@ class LUInstanceSetParams(LogicalUnit):
  
    def CheckArguments(self):
      if not (self.op.nics or self.op.disks or self.op.disk_template or
-            self.op.hvparams or self.op.beparams or self.op.os_name):
+            self.op.hvparams or self.op.beparams or self.op.os_name or
+            self.op.online_inst or self.op.offline_inst):
        raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
  
      if self.op.hvparams:
@@ -10851,7 +11099,10 @@ class LUInstanceSetParams(LogicalUnit):
  
    def ExpandNames(self):
      self._ExpandAndLockInstance()
+    # Can't even acquire node locks in shared mode as upcoming changes in
+    # Ganeti 2.6 will start to modify the node object on disk conversion
      self.needed_locks[locking.LEVEL_NODE] = []
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
  
    def DeclareLocks(self, level):
@@ -10860,6 +11111,10 @@ class LUInstanceSetParams(LogicalUnit):
        if self.op.disk_template and self.op.remote_node:
          self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
          self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
+    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -10954,7 +11209,8 @@ class LUInstanceSetParams(LogicalUnit):
                                     " %s to %s" % (instance.disk_template,
                                                    self.op.disk_template),
                                     errors.ECODE_INVAL)
-      _CheckInstanceDown(self, instance, "cannot change disk template")
+      _CheckInstanceState(self, instance, INSTANCE_DOWN,
+                          msg="cannot change disk template")
        if self.op.disk_template in constants.DTS_INT_MIRROR:
          if self.op.remote_node == pnode:
            raise errors.OpPrereqError("Given new secondary node %s is the same"
@@ -11178,7 +11434,8 @@ class LUInstanceSetParams(LogicalUnit):
          if len(instance.disks) == 1:
            raise errors.OpPrereqError("Cannot remove the last disk of"
                                       " an instance", errors.ECODE_INVAL)
-        _CheckInstanceDown(self, instance, "cannot remove disks")
+        _CheckInstanceState(self, instance, INSTANCE_DOWN,
+                            msg="cannot remove disks")
  
        if (disk_op == constants.DDM_ADD and
            len(instance.disks) >= constants.MAX_DISKS):
@@ -11193,7 +11450,15 @@ class LUInstanceSetParams(LogicalUnit):
                                       (disk_op, len(instance.disks)),
                                       errors.ECODE_INVAL)
  
-    return
+    # disabling the instance
+    if self.op.offline_inst:
+      _CheckInstanceState(self, instance, INSTANCE_DOWN,
+                          msg="cannot change instance state to offline")
+
+    # enabling the instance
+    if self.op.online_inst:
+      _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
+                          msg="cannot make instance go online")
  
    def _ConvertPlainToDrbd(self, feedback_fn):
      """Converts an instance from plain to drbd.
@@ -11204,6 +11469,8 @@ class LUInstanceSetParams(LogicalUnit):
      pnode = instance.primary_node
      snode = self.op.remote_node
  
+    assert instance.disk_template == constants.DT_PLAIN
+
      # create a fake disk info for _GenerateDiskTemplate
      disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
                    constants.IDISK_VG: d.logical_id[0]}
@@ -11240,6 +11507,9 @@ class LUInstanceSetParams(LogicalUnit):
      instance.disks = new_disks
      self.cfg.Update(instance, feedback_fn)
  
+    # Release node locks while waiting for sync
+    _ReleaseLocks(self, locking.LEVEL_NODE)
+
      # disks are created, waiting for sync
      disk_abort = not _WaitForSync(self, instance,
                                    oneshot=not self.op.wait_for_sync)
@@ -11247,12 +11517,17 @@ class LUInstanceSetParams(LogicalUnit):
        raise errors.OpExecError("There are some degraded disks for"
                                 " this instance, please cleanup manually")
  
+    # Node resource locks will be released by caller
+
    def _ConvertDrbdToPlain(self, feedback_fn):
      """Converts an instance from drbd to plain.
  
      """
      instance = self.instance
+
      assert len(instance.secondary_nodes) == 1
+    assert instance.disk_template == constants.DT_DRBD8
+
      pnode = instance.primary_node
      snode = instance.secondary_nodes[0]
      feedback_fn("Converting template to plain")
@@ -11270,6 +11545,9 @@ class LUInstanceSetParams(LogicalUnit):
      instance.disk_template = constants.DT_PLAIN
      self.cfg.Update(instance, feedback_fn)
  
+    # Release locks in case removing disks takes a while
+    _ReleaseLocks(self, locking.LEVEL_NODE)
+
      feedback_fn("Removing volumes on the secondary node...")
      for disk in old_disks:
        self.cfg.SetDiskID(disk, snode)
@@ -11287,6 +11565,8 @@ class LUInstanceSetParams(LogicalUnit):
          self.LogWarning("Could not remove metadata for disk %d on node %s,"
                          " continuing anyway: %s", idx, pnode, msg)
  
+    # Node resource locks will be released by caller
+
    def Exec(self, feedback_fn):
      """Modifies an instance.
  
@@ -11298,6 +11578,10 @@ class LUInstanceSetParams(LogicalUnit):
      for warn in self.warn:
        feedback_fn("WARNING: %s" % warn)
  
+    assert ((self.op.disk_template is None) ^
+            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
+      "Not owning any node resource locks"
+
      result = []
      instance = self.instance
      # disk changes
@@ -11355,6 +11639,16 @@ class LUInstanceSetParams(LogicalUnit):
                         disk_dict[constants.IDISK_MODE]))
  
      if self.op.disk_template:
+      if __debug__:
+        check_nodes = set(instance.all_nodes)
+        if self.op.remote_node:
+          check_nodes.add(self.op.remote_node)
+        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
+          owned = self.owned_locks(level)
+          assert not (check_nodes - owned), \
+            ("Not owning the correct locks, owning %r, expected at least %r" %
+             (owned, check_nodes))
+
        r_shut = _ShutdownInstanceDisks(self, instance)
        if not r_shut:
          raise errors.OpExecError("Cannot shutdown instance disks, unable to"
@@ -11367,6 +11661,15 @@ class LUInstanceSetParams(LogicalUnit):
          raise
        result.append(("disk_template", self.op.disk_template))
  
+      assert instance.disk_template == self.op.disk_template, \
+        ("Expected disk template '%s', found '%s'" %
+         (self.op.disk_template, instance.disk_template))
+
+    # Release node and resource locks if there are any (they might already have
+    # been released during disk conversion)
+    _ReleaseLocks(self, locking.LEVEL_NODE)
+    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
+
      # NIC changes
      for nic_op, nic_dict in self.op.nics:
        if nic_op == constants.DDM_REMOVE:
@@ -11417,8 +11720,20 @@ class LUInstanceSetParams(LogicalUnit):
        for key, val in self.op.osparams.iteritems():
          result.append(("os/%s" % key, val))
  
+    # online/offline instance
+    if self.op.online_inst:
+      self.cfg.MarkInstanceDown(instance.name)
+      result.append(("admin_state", constants.ADMINST_DOWN))
+    if self.op.offline_inst:
+      self.cfg.MarkInstanceOffline(instance.name)
+      result.append(("admin_state", constants.ADMINST_OFFLINE))
+
      self.cfg.Update(instance, feedback_fn)
  
+    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
+                self.owned_locks(locking.LEVEL_NODE)), \
+      "All node locks should have been released by now"
+
      return result
  
    _DISK_CONVERSIONS = {
@@ -11741,7 +12056,8 @@ class LUBackupExport(LogicalUnit):
            "Cannot retrieve locked instance %s" % self.op.instance_name
      _CheckNodeOnline(self, self.instance.primary_node)
  
-    if (self.op.remove_instance and self.instance.admin_up and
+    if (self.op.remove_instance and
+        self.instance.admin_state == constants.ADMINST_UP and
          not self.op.shutdown):
        raise errors.OpPrereqError("Can not remove instance without shutting it"
                                   " down before")
@@ -11871,7 +12187,7 @@ class LUBackupExport(LogicalUnit):
      for disk in instance.disks:
        self.cfg.SetDiskID(disk, src_node)
  
-    activate_disks = (not instance.admin_up)
+    activate_disks = (instance.admin_state != constants.ADMINST_UP)
  
      if activate_disks:
        # Activate the instance disks if we'exporting a stopped instance
@@ -11884,12 +12200,13 @@ class LUBackupExport(LogicalUnit):
  
        helper.CreateSnapshots()
        try:
-        if (self.op.shutdown and instance.admin_up and
+        if (self.op.shutdown and
+            instance.admin_state == constants.ADMINST_UP and
              not self.op.remove_instance):
            assert not activate_disks
            feedback_fn("Starting instance %s" % instance.name)
-          result = self.rpc.call_instance_start(src_node, instance,
-                                                None, None, False)
+          result = self.rpc.call_instance_start(src_node,
+                                                (instance, None, None), False)
            msg = result.fail_msg
            if msg:
              feedback_fn("Failed to start instance: %s" % msg)
@@ -13021,9 +13338,9 @@ class IAllocator(object):
    # pylint: disable=R0902
    # lots of instance attributes
  
-  def __init__(self, cfg, rpc, mode, **kwargs):
+  def __init__(self, cfg, rpc_runner, mode, **kwargs):
      self.cfg = cfg
-    self.rpc = rpc
+    self.rpc = rpc_runner
      # init buffer variables
      self.in_text = self.out_text = self.in_data = self.out_data = None
      # init all input fields so that pylint is happy
@@ -13187,7 +13504,7 @@ class IAllocator(object):
              i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
              remote_info["memory_free"] -= max(0, i_mem_diff)
  
-            if iinfo.admin_up:
+            if iinfo.admin_state == constants.ADMINST_UP:
                i_p_up_mem += beinfo[constants.BE_MEMORY]
  
          # compute memory used by instances
@@ -13227,7 +13544,7 @@ class IAllocator(object):
          nic_data.append(nic_dict)
        pir = {
          "tags": list(iinfo.GetTags()),
-        "admin_up": iinfo.admin_up,
+        "admin_state": iinfo.admin_state,
          "vcpus": beinfo[constants.BE_VCPUS],
          "memory": beinfo[constants.BE_MEMORY],
          "os": iinfo.os,