Transition into and out of offline instance state

[ganeti-local] / lib / cmdlib.py
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index 00233c1..57883c1 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -67,6 +67,13 @@ import ganeti.masterd.instance # pylint: disable=W0611
  #: Size of DRBD meta block device
  DRBD_META_SIZE = 128
  
+# States of instance
+INSTANCE_UP = [constants.ADMINST_UP]
+INSTANCE_DOWN = [constants.ADMINST_DOWN]
+INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
+INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
+INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
+
  
  class ResultWithJobs:
    """Data container for LU results with jobs.
@@ -721,12 +728,17 @@ def _ReleaseLocks(lu, level, names=None, keep=None):
    else:
      should_release = None
  
-  if should_release:
+  owned = lu.owned_locks(level)
+  if not owned:
+    # Not owning any lock at this level, do nothing
+    pass
+
+  elif should_release:
      retain = []
      release = []
  
      # Determine which locks to release
-    for name in lu.owned_locks(level):
+    for name in owned:
        if should_release(name):
          release.append(name)
        else:
@@ -898,20 +910,31 @@ def _GetClusterDomainSecret():
                                 strict=True)
  
  
-def _CheckInstanceDown(lu, instance, reason):
-  """Ensure that an instance is not running."""
-  if instance.admin_up:
-    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
-                               (instance.name, reason), errors.ECODE_STATE)
+def _CheckInstanceState(lu, instance, req_states, msg=None):
+  """Ensure that an instance is in one of the required states.
+
+  @param lu: the LU on behalf of which we make the check
+  @param instance: the instance to check
+  @param msg: if passed, should be a message to replace the default one
+  @raise errors.OpPrereqError: if the instance is not in the required state
  
-  pnode = instance.primary_node
-  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
-  ins_l.Raise("Can't contact node %s for instance information" % pnode,
-              prereq=True, ecode=errors.ECODE_ENVIRON)
+  """
+  if msg is None:
+    msg = "can't use instance from outside %s states" % ", ".join(req_states)
+  if instance.admin_state not in req_states:
+    raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
+                               (instance, instance.admin_state, msg),
+                               errors.ECODE_STATE)
+
+  if constants.ADMINST_UP not in req_states:
+    pnode = instance.primary_node
+    ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
+    ins_l.Raise("Can't contact node %s for instance information" % pnode,
+                prereq=True, ecode=errors.ECODE_ENVIRON)
  
-  if instance.name in ins_l.payload:
-    raise errors.OpPrereqError("Instance %s is running, %s" %
-                               (instance.name, reason), errors.ECODE_STATE)
+    if instance.name in ins_l.payload:
+      raise errors.OpPrereqError("Instance %s is running, %s" %
+                                 (instance.name, msg), errors.ECODE_STATE)
  
  
  def _ExpandItemName(fn, name, kind):
@@ -956,8 +979,8 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
    @param secondary_nodes: list of secondary nodes as strings
    @type os_type: string
    @param os_type: the name of the instance's OS
-  @type status: boolean
-  @param status: the should_run status of the instance
+  @type status: string
+  @param status: the desired status of the instance
    @type memory: string
    @param memory: the memory size of the instance
    @type vcpus: string
@@ -981,17 +1004,13 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
    @return: the hook environment for this instance
  
    """
-  if status:
-    str_status = "up"
-  else:
-    str_status = "down"
    env = {
      "OP_TARGET": name,
      "INSTANCE_NAME": name,
      "INSTANCE_PRIMARY": primary_node,
      "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
      "INSTANCE_OS_TYPE": os_type,
-    "INSTANCE_STATUS": str_status,
+    "INSTANCE_STATUS": status,
      "INSTANCE_MEMORY": memory,
      "INSTANCE_VCPUS": vcpus,
      "INSTANCE_DISK_TEMPLATE": disk_template,
@@ -1083,7 +1102,7 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None):
      "primary_node": instance.primary_node,
      "secondary_nodes": instance.secondary_nodes,
      "os_type": instance.os,
-    "status": instance.admin_up,
+    "status": instance.admin_state,
      "memory": bep[constants.BE_MEMORY],
      "vcpus": bep[constants.BE_VCPUS],
      "nics": _NICListToTuple(lu, instance.nics),
@@ -1364,8 +1383,9 @@ class LUClusterDestroy(LogicalUnit):
      # Run post hooks on master node before it's removed
      _RunPostHook(self, master_params.name)
  
+    ems = self.cfg.GetUseExternalMipScript()
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
-                                                     master_params)
+                                                     master_params, ems)
      result.Raise("Could not disable the master role")
  
      return master_params.name
@@ -1941,6 +1961,26 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        _ErrorIf(bool(missing), constants.CV_ENODENET, node,
                 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
  
+  def _VerifyNodeUserScripts(self, ninfo, nresult):
+    """Check the results of user scripts presence and executability on the node
+
+    @type ninfo: L{objects.Node}
+    @param ninfo: the node to check
+    @param nresult: the remote results for the node
+
+    """
+    node = ninfo.name
+
+    test = not constants.NV_USERSCRIPTS in nresult
+    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
+                  "did not return user scripts information")
+
+    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
+    if not test:
+      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
+                    "user scripts not present or not executable: %s" %
+                    utils.CommaJoin(sorted(broken_scripts)))
+
    def _VerifyNodeNetwork(self, ninfo, nresult):
      """Check the node network connectivity results.
  
@@ -2007,7 +2047,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
                   "volume %s missing on node %s", volume, node)
  
-    if instanceconfig.admin_up:
+    if instanceconfig.admin_state == constants.ADMINST_UP:
        pri_img = node_image[node_current]
        test = instance not in pri_img.instances and not pri_img.offline
        _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
@@ -2023,12 +2063,13 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        # node here
        snode = node_image[nname]
        bad_snode = snode.ghost or snode.offline
-      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
+      _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
+               not success and not bad_snode,
                 constants.CV_EINSTANCEFAULTYDISK, instance,
                 "couldn't retrieve status for disk/%s on %s: %s",
                 idx, nname, bdev_status)
-      _ErrorIf((instanceconfig.admin_up and success and
-                bdev_status.ldisk_status == constants.LDS_FAULTY),
+      _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
+                success and bdev_status.ldisk_status == constants.LDS_FAULTY),
                 constants.CV_EINSTANCEFAULTYDISK, instance,
                 "disk/%s on %s is faulty", idx, nname)
  
@@ -2236,7 +2277,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          node_drbd[minor] = (instance, False)
        else:
          instance = instanceinfo[instance]
-        node_drbd[minor] = (instance.name, instance.admin_up)
+        node_drbd[minor] = (instance.name,
+                            instance.admin_state == constants.ADMINST_UP)
  
      # and now check them
      used_minors = nresult.get(constants.NV_DRBDLIST, [])
@@ -2634,6 +2676,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      i_non_redundant = [] # Non redundant instances
      i_non_a_balanced = [] # Non auto-balanced instances
+    i_offline = 0 # Count of offline instances
      n_offline = 0 # Count of offline nodes
      n_drained = 0 # Count of nodes being drained
      node_vol_should = {}
@@ -2649,6 +2692,10 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
  
+    user_scripts = []
+    if self.cfg.GetUseExternalMipScript():
+      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
+
      node_verify_param = {
        constants.NV_FILELIST:
          utils.UniqueSequence(filename
@@ -2671,6 +2718,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        constants.NV_MASTERIP: (master_node, master_ip),
        constants.NV_OSLIST: None,
        constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
+      constants.NV_USERSCRIPTS: user_scripts,
        }
  
      if vg_name is not None:
@@ -2829,6 +2877,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        nimg.call_ok = self._VerifyNode(node_i, nresult)
        self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
        self._VerifyNodeNetwork(node_i, nresult)
+      self._VerifyNodeUserScripts(node_i, nresult)
        self._VerifyOob(node_i, nresult)
  
        if nimg.vm_capable:
@@ -2853,6 +2902,12 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          non_primary_inst = set(nimg.instances).difference(nimg.pinst)
  
          for inst in non_primary_inst:
+          # FIXME: investigate best way to handle offline insts
+          if inst.admin_state == constants.ADMINST_OFFLINE:
+            if verbose:
+              feedback_fn("* Skipping offline instance %s" % inst.name)
+            i_offline += 1
+            continue
            test = inst in self.all_inst_info
            _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
                     "instance should not run on node %s", node_i.name)
@@ -2878,7 +2933,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
                 constants.CV_ENODERPC, pnode, "instance %s, connection to"
                 " primary node failed", instance)
  
-      _ErrorIf(inst_config.admin_up and pnode_img.offline,
+      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
+               pnode_img.offline,
                 constants.CV_EINSTANCEBADNODE, instance,
                 "instance is marked as running and lives on offline node %s",
                 inst_config.primary_node)
@@ -2970,6 +3026,9 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
                    % len(i_non_a_balanced))
  
+    if i_offline:
+      feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
+
      if n_offline:
        feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
  
@@ -3132,8 +3191,8 @@ class LUGroupVerifyDisks(NoHooksLU):
      res_missing = {}
  
      nv_dict = _MapInstanceDisksToNodes([inst
-                                        for inst in self.instances.values()
-                                        if inst.admin_up])
+            for inst in self.instances.values()
+            if inst.admin_state == constants.ADMINST_UP])
  
      if nv_dict:
        nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
@@ -3174,21 +3233,21 @@ class LUClusterRepairDiskSizes(NoHooksLU):
      if self.op.instances:
        self.wanted_names = _GetWantedInstances(self, self.op.instances)
        self.needed_locks = {
-        locking.LEVEL_NODE: [],
+        locking.LEVEL_NODE_RES: [],
          locking.LEVEL_INSTANCE: self.wanted_names,
          }
-      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
      else:
        self.wanted_names = None
        self.needed_locks = {
-        locking.LEVEL_NODE: locking.ALL_SET,
+        locking.LEVEL_NODE_RES: locking.ALL_SET,
          locking.LEVEL_INSTANCE: locking.ALL_SET,
          }
      self.share_locks = _ShareAll()
  
    def DeclareLocks(self, level):
-    if level == locking.LEVEL_NODE and self.wanted_names is not None:
-      self._LockInstancesNodes(primary_only=True)
+    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
+      self._LockInstancesNodes(primary_only=True, level=level)
  
    def CheckPrereq(self):
      """Check prerequisites.
@@ -3239,6 +3298,11 @@ class LUClusterRepairDiskSizes(NoHooksLU):
        for idx, disk in enumerate(instance.disks):
          per_node_disks[pnode].append((instance, idx, disk))
  
+    assert not (frozenset(per_node_disks.keys()) -
+                self.owned_locks(locking.LEVEL_NODE_RES)), \
+      "Not owning correct locks"
+    assert not self.owned_locks(locking.LEVEL_NODE)
+
      changed = []
      for node, dskl in per_node_disks.items():
        newl = [v[2].Copy() for v in dskl]
@@ -3332,8 +3396,9 @@ class LUClusterRename(LogicalUnit):
  
      # shutdown the master IP
      master_params = self.cfg.GetMasterNetworkParameters()
+    ems = self.cfg.GetUseExternalMipScript()
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
-                                                     master_params)
+                                                     master_params, ems)
      result.Raise("Could not disable the master role")
  
      try:
@@ -3353,7 +3418,7 @@ class LUClusterRename(LogicalUnit):
      finally:
        master_params.ip = new_ip
        result = self.rpc.call_node_activate_master_ip(master_params.name,
-                                                     master_params)
+                                                     master_params, ems)
        msg = result.fail_msg
        if msg:
          self.LogWarning("Could not re-enable the master role on"
@@ -3711,10 +3776,11 @@ class LUClusterSetParams(LogicalUnit):
  
      if self.op.master_netdev:
        master_params = self.cfg.GetMasterNetworkParameters()
+      ems = self.cfg.GetUseExternalMipScript()
        feedback_fn("Shutting down master ip on the current netdev (%s)" %
                    self.cluster.master_netdev)
        result = self.rpc.call_node_deactivate_master_ip(master_params.name,
-                                                       master_params)
+                                                       master_params, ems)
        result.Raise("Could not disable the master ip")
        feedback_fn("Changing master_netdev from %s to %s" %
                    (master_params.netdev, self.op.master_netdev))
@@ -3740,8 +3806,9 @@ class LUClusterSetParams(LogicalUnit):
        master_params = self.cfg.GetMasterNetworkParameters()
        feedback_fn("Starting the master ip on the new master netdev (%s)" %
                    self.op.master_netdev)
+      ems = self.cfg.GetUseExternalMipScript()
        result = self.rpc.call_node_activate_master_ip(master_params.name,
-                                                     master_params)
+                                                     master_params, ems)
        if result.fail_msg:
          self.LogWarning("Could not re-enable the master ip on"
                          " the master, please restart manually: %s",
@@ -3798,9 +3865,14 @@ def _ComputeAncillaryFiles(cluster, redist):
  
    # Files which should only be on master candidates
    files_mc = set()
+
    if not redist:
      files_mc.add(constants.CLUSTER_CONF_FILE)
  
+    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
+    # replication
+    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
+
    # Files which should only be on VM-capable nodes
    files_vm = set(filename
      for hv_name in cluster.enabled_hypervisors
@@ -3904,8 +3976,9 @@ class LUClusterActivateMasterIp(NoHooksLU):
  
      """
      master_params = self.cfg.GetMasterNetworkParameters()
+    ems = self.cfg.GetUseExternalMipScript()
      self.rpc.call_node_activate_master_ip(master_params.name,
-                                          master_params)
+                                          master_params, ems)
  
  
  class LUClusterDeactivateMasterIp(NoHooksLU):
@@ -3917,7 +3990,9 @@ class LUClusterDeactivateMasterIp(NoHooksLU):
  
      """
      master_params = self.cfg.GetMasterNetworkParameters()
-    self.rpc.call_node_deactivate_master_ip(master_params.name, master_params)
+    ems = self.cfg.GetUseExternalMipScript()
+    self.rpc.call_node_deactivate_master_ip(master_params.name, master_params,
+                                            ems)
  
  
  def _WaitForSync(lu, instance, disks=None, oneshot=False):
@@ -4555,6 +4630,9 @@ class LUNodeQuery(NoHooksLU):
    def ExpandNames(self):
      self.nq.ExpandNames(self)
  
+  def DeclareLocks(self, level):
+    self.nq.DeclareLocks(self, level)
+
    def Exec(self, feedback_fn):
      return self.nq.OldStyleQuery(self)
  
@@ -4573,8 +4651,9 @@ class LUNodeQueryvols(NoHooksLU):
                         selected=self.op.output_fields)
  
    def ExpandNames(self):
+    self.share_locks = _ShareAll()
      self.needed_locks = {}
-    self.share_locks[locking.LEVEL_NODE] = 1
+
      if not self.op.nodes:
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
      else:
@@ -4641,8 +4720,8 @@ class LUNodeQueryStorage(NoHooksLU):
                         selected=self.op.output_fields)
  
    def ExpandNames(self):
+    self.share_locks = _ShareAll()
      self.needed_locks = {}
-    self.share_locks[locking.LEVEL_NODE] = 1
  
      if self.op.nodes:
        self.needed_locks[locking.LEVEL_NODE] = \
@@ -5262,6 +5341,16 @@ class LUNodeSetParams(LogicalUnit):
      else:
        self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
  
+    # Since modifying a node can have severe effects on currently running
+    # operations the resource lock is at least acquired in shared mode
+    self.needed_locks[locking.LEVEL_NODE_RES] = \
+      self.needed_locks[locking.LEVEL_NODE]
+
+    # Get node resource and instance locks in shared mode; they are not used
+    # for anything but read-only access
+    self.share_locks[locking.LEVEL_NODE_RES] = 1
+    self.share_locks[locking.LEVEL_INSTANCE] = 1
+
      if self.lock_instances:
        self.needed_locks[locking.LEVEL_INSTANCE] = \
          frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
@@ -5438,7 +5527,8 @@ class LUNodeSetParams(LogicalUnit):
          # On online nodes, check that no instances are running, and that
          # the node has the new ip and we can reach it.
          for instance in affected_instances.values():
-          _CheckInstanceDown(self, instance, "cannot change secondary ip")
+          _CheckInstanceState(self, instance, INSTANCE_DOWN,
+                              msg="cannot change secondary ip")
  
          _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
          if master.name != node.name:
@@ -5836,7 +5926,7 @@ def _SafeShutdownInstanceDisks(lu, instance, disks=None):
    _ShutdownInstanceDisks.
  
    """
-  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
+  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
    _ShutdownInstanceDisks(lu, instance, disks=disks)
  
  
@@ -6074,6 +6164,8 @@ class LUInstanceStartup(LogicalUnit):
        hv_type.CheckParameterSyntax(filled_hvp)
        _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
  
+    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
+
      self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
  
      if self.primary_offline and self.op.ignore_offline_nodes:
@@ -6171,7 +6263,7 @@ class LUInstanceReboot(LogicalUnit):
      self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
-
+    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
      _CheckNodeOnline(self, instance.primary_node)
  
      # check bridges existence
@@ -6260,6 +6352,8 @@ class LUInstanceShutdown(LogicalUnit):
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
  
+    _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
+
      self.primary_offline = \
        self.cfg.GetNodeInfo(self.instance.primary_node).offline
  
@@ -6336,7 +6430,7 @@ class LUInstanceReinstall(LogicalUnit):
        raise errors.OpPrereqError("Instance '%s' has no disks" %
                                   self.op.instance_name,
                                   errors.ECODE_INVAL)
-    _CheckInstanceDown(self, instance, "cannot reinstall")
+    _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
  
      if self.op.os_type is not None:
        # OS verification
@@ -6409,6 +6503,10 @@ class LUInstanceRecreateDisks(LogicalUnit):
        # otherwise we need to lock all nodes for disk re-creation
        primary_only = bool(self.op.nodes)
        self._LockInstancesNodes(primary_only=primary_only)
+    elif level == locking.LEVEL_NODE_RES:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -6455,10 +6553,12 @@ class LUInstanceRecreateDisks(LogicalUnit):
                                   self.op.instance_name, errors.ECODE_INVAL)
      # if we replace nodes *and* the old primary is offline, we don't
      # check
-    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
+    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
+    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
      old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
      if not (self.op.nodes and old_pnode.offline):
-      _CheckInstanceDown(self, instance, "cannot recreate disks")
+      _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
+                          msg="cannot recreate disks")
  
      if not self.op.disks:
        self.op.disks = range(len(instance.disks))
@@ -6479,6 +6579,9 @@ class LUInstanceRecreateDisks(LogicalUnit):
      """
      instance = self.instance
  
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES))
+
      to_skip = []
      mods = [] # keeps track of needed logical_id changes
  
@@ -6561,7 +6664,8 @@ class LUInstanceRename(LogicalUnit):
      instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert instance is not None
      _CheckNodeOnline(self, instance.primary_node)
-    _CheckInstanceDown(self, instance, "cannot rename")
+    _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
+                        msg="cannot rename")
      self.instance = instance
  
      new_name = self.op.new_name
@@ -6647,11 +6751,16 @@ class LUInstanceRemove(LogicalUnit):
    def ExpandNames(self):
      self._ExpandAndLockInstance()
      self.needed_locks[locking.LEVEL_NODE] = []
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
  
    def DeclareLocks(self, level):
      if level == locking.LEVEL_NODE:
        self._LockInstancesNodes()
+    elif level == locking.LEVEL_NODE_RES:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -6700,6 +6809,12 @@ class LUInstanceRemove(LogicalUnit):
                                   " node %s: %s" %
                                   (instance.name, instance.primary_node, msg))
  
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES))
+    assert not (set(instance.all_nodes) -
+                self.owned_locks(locking.LEVEL_NODE)), \
+      "Not owning correct locks"
+
      _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
  
  
@@ -6913,11 +7028,16 @@ class LUInstanceMove(LogicalUnit):
      target_node = _ExpandNodeName(self.cfg, self.op.target_node)
      self.op.target_node = target_node
      self.needed_locks[locking.LEVEL_NODE] = [target_node]
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
  
    def DeclareLocks(self, level):
      if level == locking.LEVEL_NODE:
        self._LockInstancesNodes(primary_only=True)
+    elif level == locking.LEVEL_NODE_RES:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -6975,7 +7095,7 @@ class LUInstanceMove(LogicalUnit):
      _CheckNodeNotDrained(self, target_node)
      _CheckNodeVmCapable(self, target_node)
  
-    if instance.admin_up:
+    if instance.admin_state == constants.ADMINST_UP:
        # check memory requirements on the secondary node
        _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
                             instance.name, bep[constants.BE_MEMORY],
@@ -7002,6 +7122,9 @@ class LUInstanceMove(LogicalUnit):
      self.LogInfo("Shutting down instance %s on source node %s",
                   instance.name, source_node)
  
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES))
+
      result = self.rpc.call_instance_shutdown(source_node, instance,
                                               self.op.shutdown_timeout)
      msg = result.fail_msg
@@ -7066,7 +7189,7 @@ class LUInstanceMove(LogicalUnit):
      _RemoveDisks(self, instance, target_node=source_node)
  
      # Only start the instance if it's marked as up
-    if instance.admin_up:
+    if instance.admin_state == constants.ADMINST_UP:
        self.LogInfo("Starting instance %s on node %s",
                     instance.name, target_node)
  
@@ -7204,10 +7327,11 @@ class TLMigrateInstance(Tasklet):
      assert instance is not None
      self.instance = instance
  
-    if (not self.cleanup and not instance.admin_up and not self.failover and
-        self.fallback):
-      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
-                      " to failover")
+    if (not self.cleanup and
+        not instance.admin_state == constants.ADMINST_UP and
+        not self.failover and self.fallback):
+      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
+                      " switching to failover")
        self.failover = True
  
      if instance.disk_template not in constants.DTS_MIRRORED:
@@ -7266,7 +7390,7 @@ class TLMigrateInstance(Tasklet):
      i_be = self.cfg.GetClusterInfo().FillBE(instance)
  
      # check memory requirements on the secondary node
-    if not self.failover or instance.admin_up:
+    if not self.failover or instance.admin_state == constants.ADMINST_UP:
        _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
                             instance.name, i_be[constants.BE_MEMORY],
                             instance.hypervisor)
@@ -7683,7 +7807,7 @@ class TLMigrateInstance(Tasklet):
      source_node = instance.primary_node
      target_node = self.target_node
  
-    if instance.admin_up:
+    if instance.admin_state == constants.ADMINST_UP:
        self.feedback_fn("* checking disk consistency between source and target")
        for dev in instance.disks:
          # for drbd, these are drbd over lvm
@@ -7726,7 +7850,7 @@ class TLMigrateInstance(Tasklet):
      self.cfg.Update(instance, self.feedback_fn)
  
      # Only start the instance if it's marked as up
-    if instance.admin_up:
+    if instance.admin_state == constants.ADMINST_UP:
        self.feedback_fn("* activating the instance's disks on target node %s" %
                         target_node)
        logging.info("Starting instance %s on node %s",
@@ -8483,7 +8607,11 @@ class LUInstanceCreate(LogicalUnit):
      self.add_locks[locking.LEVEL_INSTANCE] = instance_name
  
      if self.op.iallocator:
+      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
+      # specifying a group on instance creation and then selecting nodes from
+      # that group
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
      else:
        self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
        nodelist = [self.op.pnode]
@@ -8491,6 +8619,9 @@ class LUInstanceCreate(LogicalUnit):
          self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
          nodelist.append(self.op.snode)
        self.needed_locks[locking.LEVEL_NODE] = nodelist
+      # Lock resources of instance's primary and secondary nodes (copy to
+      # prevent accidential modification)
+      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
  
      # in case of import lock the source node too
      if self.op.mode == constants.INSTANCE_IMPORT:
@@ -9097,6 +9228,10 @@ class LUInstanceCreate(LogicalUnit):
      instance = self.op.instance_name
      pnode_name = self.pnode.name
  
+    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
+                self.owned_locks(locking.LEVEL_NODE)), \
+      "Node locks differ from node resource locks"
+
      ht_kind = self.op.hypervisor
      if ht_kind in constants.HTS_REQ_PORT:
        network_port = self.cfg.AllocatePort()
@@ -9117,7 +9252,7 @@ class LUInstanceCreate(LogicalUnit):
                              primary_node=pnode_name,
                              nics=self.nics, disks=disks,
                              disk_template=self.op.disk_template,
-                            admin_up=False,
+                            admin_state=constants.ADMINST_DOWN,
                              network_port=network_port,
                              beparams=self.op.beparams,
                              hvparams=self.op.hvparams,
@@ -9199,6 +9334,9 @@ class LUInstanceCreate(LogicalUnit):
        raise errors.OpExecError("There are some degraded disks for"
                                 " this instance")
  
+    # Release all node resource locks
+    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
+
      if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
        if self.op.mode == constants.INSTANCE_CREATE:
          if not self.op.no_install:
@@ -9291,8 +9429,10 @@ class LUInstanceCreate(LogicalUnit):
          raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
                                       % self.op.mode)
  
+    assert not self.owned_locks(locking.LEVEL_NODE_RES)
+
      if self.op.start:
-      iobj.admin_up = True
+      iobj.admin_state = constants.ADMINST_UP
        self.cfg.Update(iobj, feedback_fn)
        logging.info("Starting instance %s on node %s", instance, pnode_name)
        feedback_fn("* starting instance...")
@@ -9314,6 +9454,7 @@ class LUInstanceConsole(NoHooksLU):
    REQ_BGL = False
  
    def ExpandNames(self):
+    self.share_locks = _ShareAll()
      self._ExpandAndLockInstance()
  
    def CheckPrereq(self):
@@ -9339,10 +9480,12 @@ class LUInstanceConsole(NoHooksLU):
      node_insts.Raise("Can't get node information from %s" % node)
  
      if instance.name not in node_insts.payload:
-      if instance.admin_up:
+      if instance.admin_state == constants.ADMINST_UP:
          state = constants.INSTST_ERRORDOWN
-      else:
+      elif instance.admin_state == constants.ADMINST_DOWN:
          state = constants.INSTST_ADMINDOWN
+      else:
+        state = constants.INSTST_ADMINOFFLINE
        raise errors.OpExecError("Instance %s is not running (state %s)" %
                                 (instance.name, state))
  
@@ -9388,6 +9531,7 @@ class LUInstanceReplaceDisks(LogicalUnit):
      self._ExpandAndLockInstance()
  
      assert locking.LEVEL_NODE not in self.needed_locks
+    assert locking.LEVEL_NODE_RES not in self.needed_locks
      assert locking.LEVEL_NODEGROUP not in self.needed_locks
  
      assert self.op.iallocator is None or self.op.remote_node is None, \
@@ -9410,6 +9554,8 @@ class LUInstanceReplaceDisks(LogicalUnit):
          # iallocator will select a new node in the same group
          self.needed_locks[locking.LEVEL_NODEGROUP] = []
  
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
+
      self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
                                     self.op.iallocator, self.op.remote_node,
                                     self.op.disks, False, self.op.early_release)
@@ -9423,6 +9569,8 @@ class LUInstanceReplaceDisks(LogicalUnit):
        assert not self.needed_locks[locking.LEVEL_NODEGROUP]
  
        self.share_locks[locking.LEVEL_NODEGROUP] = 1
+      # Lock all groups used by instance optimistically; this requires going
+      # via the node before it's locked, requiring verification later on
        self.needed_locks[locking.LEVEL_NODEGROUP] = \
          self.cfg.GetInstanceNodeGroups(self.op.instance_name)
  
@@ -9437,6 +9585,10 @@ class LUInstanceReplaceDisks(LogicalUnit):
            for node_name in self.cfg.GetNodeGroup(group_uuid).members]
        else:
          self._LockInstancesNodes()
+    elif level == locking.LEVEL_NODE_RES:
+      # Reuse node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -9473,6 +9625,7 @@ class LUInstanceReplaceDisks(LogicalUnit):
      assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
              self.op.iallocator is None)
  
+    # Verify if node group locks are still correct
      owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
      if owned_groups:
        _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
@@ -9731,8 +9884,9 @@ class TLReplaceDisks(Tasklet):
                                                            self.target_node]
                                if node_name is not None)
  
-    # Release unneeded node locks
+    # Release unneeded node and node resource locks
      _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
+    _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
  
      # Release any owned node group
      if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
@@ -9761,6 +9915,8 @@ class TLReplaceDisks(Tasklet):
        assert set(owned_nodes) == set(self.node_secondary_ip), \
            ("Incorrect node locks, owning %s, expected %s" %
             (owned_nodes, self.node_secondary_ip.keys()))
+      assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
+              self.lu.owned_locks(locking.LEVEL_NODE_RES))
  
        owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
        assert list(owned_instances) == [self.instance_name], \
@@ -9776,7 +9932,7 @@ class TLReplaceDisks(Tasklet):
      feedback_fn("Replacing disk(s) %s for %s" %
                  (utils.CommaJoin(self.disks), self.instance.name))
  
-    activate_disks = (not self.instance.admin_up)
+    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
  
      # Activate the instance disks if we're replacing them on a down instance
      if activate_disks:
@@ -9796,9 +9952,11 @@ class TLReplaceDisks(Tasklet):
        if activate_disks:
          _SafeShutdownInstanceDisks(self.lu, self.instance)
  
+    assert not self.lu.owned_locks(locking.LEVEL_NODE)
+
      if __debug__:
        # Verify owned locks
-      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
+      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
        nodes = frozenset(self.node_secondary_ip)
        assert ((self.early_release and not owned_nodes) or
                (not self.early_release and not (set(owned_nodes) - nodes))), \
@@ -10038,10 +10196,18 @@ class TLReplaceDisks(Tasklet):
        self.lu.LogStep(cstep, steps_total, "Removing old storage")
        cstep += 1
        self._RemoveOldStorage(self.target_node, iv_names)
-      # WARNING: we release both node locks here, do not do other RPCs
-      # than WaitForSync to the primary node
-      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
-                    names=[self.target_node, self.other_node])
+      # TODO: Check if releasing locks early still makes sense
+      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
+    else:
+      # Release all resource locks except those used by the instance
+      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
+                    keep=self.node_secondary_ip.keys())
+
+    # Release all node locks while waiting for sync
+    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
+
+    # TODO: Can the instance lock be downgraded here? Take the optional disk
+    # shutdown in the caller into consideration.
  
      # Wait for sync
      # This can fail as the old devices are degraded and _WaitForSync
@@ -10175,6 +10341,9 @@ class TLReplaceDisks(Tasklet):
  
      self.cfg.Update(self.instance, feedback_fn)
  
+    # Release all node locks (the configuration has been updated)
+    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
+
      # and now perform the drbd attach
      self.lu.LogInfo("Attaching primary drbds to new secondary"
                      " (standalone => connected)")
@@ -10196,12 +10365,15 @@ class TLReplaceDisks(Tasklet):
        self.lu.LogStep(cstep, steps_total, "Removing old storage")
        cstep += 1
        self._RemoveOldStorage(self.target_node, iv_names)
-      # WARNING: we release all node locks here, do not do other RPCs
-      # than WaitForSync to the primary node
-      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
-                    names=[self.instance.primary_node,
-                           self.target_node,
-                           self.new_node])
+      # TODO: Check if releasing locks early still makes sense
+      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
+    else:
+      # Release all resource locks except those used by the instance
+      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
+                    keep=self.node_secondary_ip.keys())
+
+    # TODO: Can the instance lock be downgraded here? Take the optional disk
+    # shutdown in the caller into consideration.
  
      # Wait for sync
      # This can fail as the old devices are degraded and _WaitForSync
@@ -10261,7 +10433,7 @@ class LURepairNodeStorage(NoHooksLU):
      """
      # Check whether any instance on this node has faulty disks
      for inst in _GetNodeInstances(self.cfg, self.op.node_name):
-      if not inst.admin_up:
+      if inst.admin_state != constants.ADMINST_UP:
          continue
        check_nodes = set(inst.all_nodes)
        check_nodes.discard(self.op.node_name)
@@ -10511,11 +10683,16 @@ class LUInstanceGrowDisk(LogicalUnit):
    def ExpandNames(self):
      self._ExpandAndLockInstance()
      self.needed_locks[locking.LEVEL_NODE] = []
-    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
+    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
  
    def DeclareLocks(self, level):
      if level == locking.LEVEL_NODE:
        self._LockInstancesNodes()
+    elif level == locking.LEVEL_NODE_RES:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -10572,10 +10749,18 @@ class LUInstanceGrowDisk(LogicalUnit):
      instance = self.instance
      disk = self.disk
  
+    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES))
+
      disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
      if not disks_ok:
        raise errors.OpExecError("Cannot activate block device to grow")
  
+    feedback_fn("Growing disk %s of instance '%s' by %s" %
+                (self.op.disk, instance.name,
+                 utils.FormatUnit(self.op.amount, "h")))
+
      # First run all grow ops in dry-run mode
      for node in instance.all_nodes:
        self.cfg.SetDiskID(disk, node)
@@ -10598,18 +10783,28 @@ class LUInstanceGrowDisk(LogicalUnit):
  
      disk.RecordGrow(self.op.amount)
      self.cfg.Update(instance, feedback_fn)
+
+    # Changes have been recorded, release node lock
+    _ReleaseLocks(self, locking.LEVEL_NODE)
+
+    # Downgrade lock while waiting for sync
+    self.glm.downgrade(locking.LEVEL_INSTANCE)
+
      if self.op.wait_for_sync:
        disk_abort = not _WaitForSync(self, instance, disks=[disk])
        if disk_abort:
          self.proc.LogWarning("Disk sync-ing has not returned a good"
                               " status; please check the instance")
-      if not instance.admin_up:
+      if instance.admin_state != constants.ADMINST_UP:
          _SafeShutdownInstanceDisks(self, instance, disks=[disk])
-    elif not instance.admin_up:
+    elif instance.admin_state != constants.ADMINST_UP:
        self.proc.LogWarning("Not shutting down the disk even if the instance is"
                             " not supposed to be running because no wait for"
                             " sync mode was requested")
  
+    assert self.owned_locks(locking.LEVEL_NODE_RES)
+    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
+
  
  class LUInstanceQueryData(NoHooksLU):
    """Query runtime instance data.
@@ -10741,19 +10936,17 @@ class LUInstanceQueryData(NoHooksLU):
          if remote_info and "state" in remote_info:
            remote_state = "up"
          else:
-          remote_state = "down"
-
-      if instance.admin_up:
-        config_state = "up"
-      else:
-        config_state = "down"
+          if instance.admin_state == constants.ADMINST_UP:
+            remote_state = "down"
+          else:
+            remote_state = instance.admin_state
  
        disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
                    instance.disks)
  
        result[instance.name] = {
          "name": instance.name,
-        "config_state": config_state,
+        "config_state": instance.admin_state,
          "run_state": remote_state,
          "pnode": instance.primary_node,
          "snodes": instance.secondary_nodes,
@@ -10789,7 +10982,8 @@ class LUInstanceSetParams(LogicalUnit):
  
    def CheckArguments(self):
      if not (self.op.nics or self.op.disks or self.op.disk_template or
-            self.op.hvparams or self.op.beparams or self.op.os_name):
+            self.op.hvparams or self.op.beparams or self.op.os_name or
+            self.op.online_inst or self.op.offline_inst):
        raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
  
      if self.op.hvparams:
@@ -10905,7 +11099,10 @@ class LUInstanceSetParams(LogicalUnit):
  
    def ExpandNames(self):
      self._ExpandAndLockInstance()
+    # Can't even acquire node locks in shared mode as upcoming changes in
+    # Ganeti 2.6 will start to modify the node object on disk conversion
      self.needed_locks[locking.LEVEL_NODE] = []
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
  
    def DeclareLocks(self, level):
@@ -10914,6 +11111,10 @@ class LUInstanceSetParams(LogicalUnit):
        if self.op.disk_template and self.op.remote_node:
          self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
          self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
+    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -11008,7 +11209,8 @@ class LUInstanceSetParams(LogicalUnit):
                                     " %s to %s" % (instance.disk_template,
                                                    self.op.disk_template),
                                     errors.ECODE_INVAL)
-      _CheckInstanceDown(self, instance, "cannot change disk template")
+      _CheckInstanceState(self, instance, INSTANCE_DOWN,
+                          msg="cannot change disk template")
        if self.op.disk_template in constants.DTS_INT_MIRROR:
          if self.op.remote_node == pnode:
            raise errors.OpPrereqError("Given new secondary node %s is the same"
@@ -11232,7 +11434,8 @@ class LUInstanceSetParams(LogicalUnit):
          if len(instance.disks) == 1:
            raise errors.OpPrereqError("Cannot remove the last disk of"
                                       " an instance", errors.ECODE_INVAL)
-        _CheckInstanceDown(self, instance, "cannot remove disks")
+        _CheckInstanceState(self, instance, INSTANCE_DOWN,
+                            msg="cannot remove disks")
  
        if (disk_op == constants.DDM_ADD and
            len(instance.disks) >= constants.MAX_DISKS):
@@ -11247,7 +11450,15 @@ class LUInstanceSetParams(LogicalUnit):
                                       (disk_op, len(instance.disks)),
                                       errors.ECODE_INVAL)
  
-    return
+    # disabling the instance
+    if self.op.offline_inst:
+      _CheckInstanceState(self, instance, INSTANCE_DOWN,
+                          msg="cannot change instance state to offline")
+
+    # enabling the instance
+    if self.op.online_inst:
+      _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
+                          msg="cannot make instance go online")
  
    def _ConvertPlainToDrbd(self, feedback_fn):
      """Converts an instance from plain to drbd.
@@ -11258,6 +11469,8 @@ class LUInstanceSetParams(LogicalUnit):
      pnode = instance.primary_node
      snode = self.op.remote_node
  
+    assert instance.disk_template == constants.DT_PLAIN
+
      # create a fake disk info for _GenerateDiskTemplate
      disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
                    constants.IDISK_VG: d.logical_id[0]}
@@ -11294,6 +11507,9 @@ class LUInstanceSetParams(LogicalUnit):
      instance.disks = new_disks
      self.cfg.Update(instance, feedback_fn)
  
+    # Release node locks while waiting for sync
+    _ReleaseLocks(self, locking.LEVEL_NODE)
+
      # disks are created, waiting for sync
      disk_abort = not _WaitForSync(self, instance,
                                    oneshot=not self.op.wait_for_sync)
@@ -11301,12 +11517,17 @@ class LUInstanceSetParams(LogicalUnit):
        raise errors.OpExecError("There are some degraded disks for"
                                 " this instance, please cleanup manually")
  
+    # Node resource locks will be released by caller
+
    def _ConvertDrbdToPlain(self, feedback_fn):
      """Converts an instance from drbd to plain.
  
      """
      instance = self.instance
+
      assert len(instance.secondary_nodes) == 1
+    assert instance.disk_template == constants.DT_DRBD8
+
      pnode = instance.primary_node
      snode = instance.secondary_nodes[0]
      feedback_fn("Converting template to plain")
@@ -11324,6 +11545,9 @@ class LUInstanceSetParams(LogicalUnit):
      instance.disk_template = constants.DT_PLAIN
      self.cfg.Update(instance, feedback_fn)
  
+    # Release locks in case removing disks takes a while
+    _ReleaseLocks(self, locking.LEVEL_NODE)
+
      feedback_fn("Removing volumes on the secondary node...")
      for disk in old_disks:
        self.cfg.SetDiskID(disk, snode)
@@ -11341,6 +11565,8 @@ class LUInstanceSetParams(LogicalUnit):
          self.LogWarning("Could not remove metadata for disk %d on node %s,"
                          " continuing anyway: %s", idx, pnode, msg)
  
+    # Node resource locks will be released by caller
+
    def Exec(self, feedback_fn):
      """Modifies an instance.
  
@@ -11352,6 +11578,10 @@ class LUInstanceSetParams(LogicalUnit):
      for warn in self.warn:
        feedback_fn("WARNING: %s" % warn)
  
+    assert ((self.op.disk_template is None) ^
+            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
+      "Not owning any node resource locks"
+
      result = []
      instance = self.instance
      # disk changes
@@ -11409,6 +11639,16 @@ class LUInstanceSetParams(LogicalUnit):
                         disk_dict[constants.IDISK_MODE]))
  
      if self.op.disk_template:
+      if __debug__:
+        check_nodes = set(instance.all_nodes)
+        if self.op.remote_node:
+          check_nodes.add(self.op.remote_node)
+        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
+          owned = self.owned_locks(level)
+          assert not (check_nodes - owned), \
+            ("Not owning the correct locks, owning %r, expected at least %r" %
+             (owned, check_nodes))
+
        r_shut = _ShutdownInstanceDisks(self, instance)
        if not r_shut:
          raise errors.OpExecError("Cannot shutdown instance disks, unable to"
@@ -11421,6 +11661,15 @@ class LUInstanceSetParams(LogicalUnit):
          raise
        result.append(("disk_template", self.op.disk_template))
  
+      assert instance.disk_template == self.op.disk_template, \
+        ("Expected disk template '%s', found '%s'" %
+         (self.op.disk_template, instance.disk_template))
+
+    # Release node and resource locks if there are any (they might already have
+    # been released during disk conversion)
+    _ReleaseLocks(self, locking.LEVEL_NODE)
+    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
+
      # NIC changes
      for nic_op, nic_dict in self.op.nics:
        if nic_op == constants.DDM_REMOVE:
@@ -11471,8 +11720,20 @@ class LUInstanceSetParams(LogicalUnit):
        for key, val in self.op.osparams.iteritems():
          result.append(("os/%s" % key, val))
  
+    # online/offline instance
+    if self.op.online_inst:
+      self.cfg.MarkInstanceDown(instance.name)
+      result.append(("admin_state", constants.ADMINST_DOWN))
+    if self.op.offline_inst:
+      self.cfg.MarkInstanceOffline(instance.name)
+      result.append(("admin_state", constants.ADMINST_OFFLINE))
+
      self.cfg.Update(instance, feedback_fn)
  
+    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
+                self.owned_locks(locking.LEVEL_NODE)), \
+      "All node locks should have been released by now"
+
      return result
  
    _DISK_CONVERSIONS = {
@@ -11795,7 +12056,8 @@ class LUBackupExport(LogicalUnit):
            "Cannot retrieve locked instance %s" % self.op.instance_name
      _CheckNodeOnline(self, self.instance.primary_node)
  
-    if (self.op.remove_instance and self.instance.admin_up and
+    if (self.op.remove_instance and
+        self.instance.admin_state == constants.ADMINST_UP and
          not self.op.shutdown):
        raise errors.OpPrereqError("Can not remove instance without shutting it"
                                   " down before")
@@ -11925,7 +12187,7 @@ class LUBackupExport(LogicalUnit):
      for disk in instance.disks:
        self.cfg.SetDiskID(disk, src_node)
  
-    activate_disks = (not instance.admin_up)
+    activate_disks = (instance.admin_state != constants.ADMINST_UP)
  
      if activate_disks:
        # Activate the instance disks if we'exporting a stopped instance
@@ -11938,7 +12200,8 @@ class LUBackupExport(LogicalUnit):
  
        helper.CreateSnapshots()
        try:
-        if (self.op.shutdown and instance.admin_up and
+        if (self.op.shutdown and
+            instance.admin_state == constants.ADMINST_UP and
              not self.op.remove_instance):
            assert not activate_disks
            feedback_fn("Starting instance %s" % instance.name)
@@ -13241,7 +13504,7 @@ class IAllocator(object):
              i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
              remote_info["memory_free"] -= max(0, i_mem_diff)
  
-            if iinfo.admin_up:
+            if iinfo.admin_state == constants.ADMINST_UP:
                i_p_up_mem += beinfo[constants.BE_MEMORY]
  
          # compute memory used by instances
@@ -13281,7 +13544,7 @@ class IAllocator(object):
          nic_data.append(nic_dict)
        pir = {
          "tags": list(iinfo.GetTags()),
-        "admin_up": iinfo.admin_up,
+        "admin_state": iinfo.admin_state,
          "vcpus": beinfo[constants.BE_VCPUS],
          "memory": beinfo[constants.BE_MEMORY],
          "os": iinfo.os,