Allow per-hypervisor optional files
[ganeti-local] / lib / cmdlib.py
index ccc520a..daec49d 100644 (file)
@@ -21,7 +21,7 @@
 
 """Module implementing the master-side code."""
 
-# pylint: disable-msg=W0201,C0302
+# pylint: disable=W0201,C0302
 
 # W0201 since most LU attributes are defined in CheckPrereq or similar
 # functions
@@ -40,6 +40,7 @@ import socket
 import tempfile
 import shutil
 import itertools
+import operator
 
 from ganeti import ssh
 from ganeti import utils
@@ -59,20 +60,7 @@ from ganeti import qlang
 from ganeti import opcodes
 from ganeti import ht
 
-import ganeti.masterd.instance # pylint: disable-msg=W0611
-
-
-def _SupportsOob(cfg, node):
-  """Tells if node supports OOB.
-
-  @type cfg: L{config.ConfigWriter}
-  @param cfg: The cluster configuration
-  @type node: L{objects.Node}
-  @param node: The node
-  @return: The OOB script if supported or an empty string otherwise
-
-  """
-  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
+import ganeti.masterd.instance # pylint: disable=W0611
 
 
 class ResultWithJobs:
@@ -131,6 +119,8 @@ class LogicalUnit(object):
     self.op = op
     self.cfg = context.cfg
     self.glm = context.glm
+    # readability alias
+    self.owned_locks = context.glm.list_owned
     self.context = context
     self.rpc = rpc
     # Dicts used to declare locking needs to mcpu
@@ -141,10 +131,10 @@ class LogicalUnit(object):
     # Used to force good behavior when calling helper functions
     self.recalculate_locks = {}
     # logging
-    self.Log = processor.Log # pylint: disable-msg=C0103
-    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
-    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
-    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
+    self.Log = processor.Log # pylint: disable=C0103
+    self.LogWarning = processor.LogWarning # pylint: disable=C0103
+    self.LogInfo = processor.LogInfo # pylint: disable=C0103
+    self.LogStep = processor.LogStep # pylint: disable=C0103
     # support for dry-run
     self.dry_run_result = None
     # support for generic debug attribute
@@ -332,7 +322,7 @@ class LogicalUnit(object):
     """
     # API must be kept, thus we ignore the unused argument and could
     # be a function warnings
-    # pylint: disable-msg=W0613,R0201
+    # pylint: disable=W0613,R0201
     return lu_result
 
   def _ExpandAndLockInstance(self):
@@ -386,8 +376,8 @@ class LogicalUnit(object):
     # future we might want to have different behaviors depending on the value
     # of self.recalculate_locks[locking.LEVEL_NODE]
     wanted_nodes = []
-    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
-      instance = self.context.cfg.GetInstanceInfo(instance_name)
+    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
+    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
       wanted_nodes.append(instance.primary_node)
       if not primary_only:
         wanted_nodes.extend(instance.secondary_nodes)
@@ -400,7 +390,7 @@ class LogicalUnit(object):
     del self.recalculate_locks[locking.LEVEL_NODE]
 
 
-class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
+class NoHooksLU(LogicalUnit): # pylint: disable=W0223
   """Simple LU which runs no hooks.
 
   This LU is intended as a parent for other LogicalUnits which will
@@ -500,7 +490,7 @@ class _QueryBase:
 
     """
     if self.do_locking:
-      names = lu.glm.list_owned(lock_level)
+      names = lu.owned_locks(lock_level)
     else:
       names = all_names
 
@@ -560,6 +550,76 @@ class _QueryBase:
                                     sort_by_name=self.sort_by_name)
 
 
+def _ShareAll():
+  """Returns a dict declaring all lock levels shared.
+
+  """
+  return dict.fromkeys(locking.LEVELS, 1)
+
+
+def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
+  """Checks if the owned node groups are still correct for an instance.
+
+  @type cfg: L{config.ConfigWriter}
+  @param cfg: The cluster configuration
+  @type instance_name: string
+  @param instance_name: Instance name
+  @type owned_groups: set or frozenset
+  @param owned_groups: List of currently owned node groups
+
+  """
+  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
+
+  if not owned_groups.issuperset(inst_groups):
+    raise errors.OpPrereqError("Instance %s's node groups changed since"
+                               " locks were acquired, current groups are"
+                               " are '%s', owning groups '%s'; retry the"
+                               " operation" %
+                               (instance_name,
+                                utils.CommaJoin(inst_groups),
+                                utils.CommaJoin(owned_groups)),
+                               errors.ECODE_STATE)
+
+  return inst_groups
+
+
+def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
+  """Checks if the instances in a node group are still correct.
+
+  @type cfg: L{config.ConfigWriter}
+  @param cfg: The cluster configuration
+  @type group_uuid: string
+  @param group_uuid: Node group UUID
+  @type owned_instances: set or frozenset
+  @param owned_instances: List of currently owned instances
+
+  """
+  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
+  if owned_instances != wanted_instances:
+    raise errors.OpPrereqError("Instances in node group '%s' changed since"
+                               " locks were acquired, wanted '%s', have '%s';"
+                               " retry the operation" %
+                               (group_uuid,
+                                utils.CommaJoin(wanted_instances),
+                                utils.CommaJoin(owned_instances)),
+                               errors.ECODE_STATE)
+
+  return wanted_instances
+
+
+def _SupportsOob(cfg, node):
+  """Tells if node supports OOB.
+
+  @type cfg: L{config.ConfigWriter}
+  @param cfg: The cluster configuration
+  @type node: L{objects.Node}
+  @param node: The node
+  @return: The OOB script if supported or an empty string otherwise
+
+  """
+  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
+
+
 def _GetWantedNodes(lu, nodes):
   """Returns list of checked and expanded node names.
 
@@ -657,18 +717,18 @@ def _ReleaseLocks(lu, level, names=None, keep=None):
     release = []
 
     # Determine which locks to release
-    for name in lu.glm.list_owned(level):
+    for name in lu.owned_locks(level):
       if should_release(name):
         release.append(name)
       else:
         retain.append(name)
 
-    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
+    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 
     # Release just some locks
     lu.glm.release(level, names=release)
 
-    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
+    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
   else:
     # Release everything
     lu.glm.release(level)
@@ -676,6 +736,19 @@ def _ReleaseLocks(lu, level, names=None, keep=None):
     assert not lu.glm.is_owned(level), "No locks should be owned"
 
 
+def _MapInstanceDisksToNodes(instances):
+  """Creates a map from (node, volume) to instance name.
+
+  @type instances: list of L{objects.Instance}
+  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
+
+  """
+  return dict(((node, vol), inst.name)
+              for inst in instances
+              for (node, vols) in inst.MapLVsByNode().items()
+              for vol in vols)
+
+
 def _RunPostHook(lu, node_name):
   """Runs the post-hook for an opcode on a single node.
 
@@ -684,7 +757,7 @@ def _RunPostHook(lu, node_name):
   try:
     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
   except:
-    # pylint: disable-msg=W0702
+    # pylint: disable=W0702
     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 
 
@@ -997,24 +1070,24 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None):
   bep = cluster.FillBE(instance)
   hvp = cluster.FillHV(instance)
   args = {
-    'name': instance.name,
-    'primary_node': instance.primary_node,
-    'secondary_nodes': instance.secondary_nodes,
-    'os_type': instance.os,
-    'status': instance.admin_up,
-    'memory': bep[constants.BE_MEMORY],
-    'vcpus': bep[constants.BE_VCPUS],
-    'nics': _NICListToTuple(lu, instance.nics),
-    'disk_template': instance.disk_template,
-    'disks': [(disk.size, disk.mode) for disk in instance.disks],
-    'bep': bep,
-    'hvp': hvp,
-    'hypervisor_name': instance.hypervisor,
-    'tags': instance.tags,
+    "name": instance.name,
+    "primary_node": instance.primary_node,
+    "secondary_nodes": instance.secondary_nodes,
+    "os_type": instance.os,
+    "status": instance.admin_up,
+    "memory": bep[constants.BE_MEMORY],
+    "vcpus": bep[constants.BE_VCPUS],
+    "nics": _NICListToTuple(lu, instance.nics),
+    "disk_template": instance.disk_template,
+    "disks": [(disk.size, disk.mode) for disk in instance.disks],
+    "bep": bep,
+    "hvp": hvp,
+    "hypervisor_name": instance.hypervisor,
+    "tags": instance.tags,
   }
   if override:
     args.update(override)
-  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
+  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
 
 
 def _AdjustCandidatePool(lu, exceptions):
@@ -1076,9 +1149,13 @@ def _CheckOSVariant(os_obj, name):
   @param name: OS name passed by the user, to check for validity
 
   """
+  variant = objects.OS.GetVariant(name)
   if not os_obj.supported_variants:
+    if variant:
+      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
+                                 " passed)" % (os_obj.name, variant),
+                                 errors.ECODE_INVAL)
     return
-  variant = objects.OS.GetVariant(name)
   if not variant:
     raise errors.OpPrereqError("OS name must include a variant",
                                errors.ECODE_INVAL)
@@ -1176,6 +1253,29 @@ def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
                                  " iallocator")
 
 
+def _GetDefaultIAllocator(cfg, iallocator):
+  """Decides on which iallocator to use.
+
+  @type cfg: L{config.ConfigWriter}
+  @param cfg: Cluster configuration object
+  @type iallocator: string or None
+  @param iallocator: Iallocator specified in opcode
+  @rtype: string
+  @return: Iallocator name
+
+  """
+  if not iallocator:
+    # Use default iallocator
+    iallocator = cfg.GetDefaultIAllocator()
+
+  if not iallocator:
+    raise errors.OpPrereqError("No iallocator was specified, neither in the"
+                               " opcode nor as a cluster-wide default",
+                               errors.ECODE_INVAL)
+
+  return iallocator
+
+
 class LUClusterPostInit(LogicalUnit):
   """Logical unit for running hooks after cluster initialization.
 
@@ -1255,14 +1355,14 @@ class LUClusterDestroy(LogicalUnit):
     # Run post hooks on master node before it's removed
     _RunPostHook(self, master)
 
-    result = self.rpc.call_node_stop_master(master, False)
+    result = self.rpc.call_node_deactivate_master_ip(master)
     result.Raise("Could not disable the master role")
 
     return master
 
 
 def _VerifyCertificate(filename):
-  """Verifies a certificate for LUClusterVerifyConfig.
+  """Verifies a certificate for L{LUClusterVerifyConfig}.
 
   @type filename: string
   @param filename: Path to PEM file
@@ -1271,7 +1371,7 @@ def _VerifyCertificate(filename):
   try:
     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
                                            utils.ReadFile(filename))
-  except Exception, err: # pylint: disable-msg=W0703
+  except Exception, err: # pylint: disable=W0703
     return (LUClusterVerifyConfig.ETYPE_ERROR,
             "Failed to load X509 certificate %s: %s" % (filename, err))
 
@@ -1386,7 +1486,7 @@ class _VerifyErrors(object):
     if args:
       msg = msg % args
     # then format the whole message
-    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
+    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
     else:
       if item:
@@ -1395,14 +1495,14 @@ class _VerifyErrors(object):
         item = ""
       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
     # and finally report it via the feedback_fn
-    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
+    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
 
   def _ErrorIf(self, cond, *args, **kwargs):
     """Log an error message if the passed condition is True.
 
     """
     cond = (bool(cond)
-            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
+            or self.op.debug_simulate_errors) # pylint: disable=E1101
     if cond:
       self._Error(*args, **kwargs)
     # do not mark the operation as failed for WARN cases only
@@ -1410,11 +1510,52 @@ class _VerifyErrors(object):
       self.bad = self.bad or cond
 
 
+class LUClusterVerify(NoHooksLU):
+  """Submits all jobs necessary to verify the cluster.
+
+  """
+  REQ_BGL = False
+
+  def ExpandNames(self):
+    self.needed_locks = {}
+
+  def Exec(self, feedback_fn):
+    jobs = []
+
+    if self.op.group_name:
+      groups = [self.op.group_name]
+      depends_fn = lambda: None
+    else:
+      groups = self.cfg.GetNodeGroupList()
+
+      # Verify global configuration
+      jobs.append([opcodes.OpClusterVerifyConfig()])
+
+      # Always depend on global verification
+      depends_fn = lambda: [(-len(jobs), [])]
+
+    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
+                                              depends=depends_fn())]
+                for group in groups)
+
+    # Fix up all parameters
+    for op in itertools.chain(*jobs): # pylint: disable=W0142
+      op.debug_simulate_errors = self.op.debug_simulate_errors
+      op.verbose = self.op.verbose
+      op.error_codes = self.op.error_codes
+      try:
+        op.skip_checks = self.op.skip_checks
+      except AttributeError:
+        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
+
+    return ResultWithJobs(jobs)
+
+
 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
   """Verifies the cluster config.
 
   """
-  REQ_BGL = False
+  REQ_BGL = True
 
   def _VerifyHVP(self, hvp_data):
     """Verifies locally the syntax of the hypervisor parameters.
@@ -1431,6 +1572,9 @@ class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
 
   def ExpandNames(self):
+    # Information can be safely retrieved as the BGL is acquired in exclusive
+    # mode
+    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
     self.all_node_info = self.cfg.GetAllNodesInfo()
     self.all_inst_info = self.cfg.GetAllInstancesInfo()
@@ -1462,7 +1606,7 @@ class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
     feedback_fn("* Verifying all nodes belong to an existing group")
 
     # We do this verification here because, should this bogus circumstance
-    # occur, it would never be catched by VerifyGroup, which only acts on
+    # occur, it would never be caught by VerifyGroup, which only acts on
     # nodes/instances reachable from existing node groups.
 
     dangling_nodes = set(node.name for node in self.all_node_info.values()
@@ -1492,7 +1636,7 @@ class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
                   "the following instances have a non-existing primary-node:"
                   " %s", utils.CommaJoin(no_node_instances))
 
-    return (not self.bad, [g.name for g in self.all_group_info.values()])
+    return not self.bad
 
 
 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
@@ -1559,60 +1703,60 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
     # This raises errors.OpPrereqError on its own:
     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
 
-    all_node_info = self.cfg.GetAllNodesInfo()
-    all_inst_info = self.cfg.GetAllInstancesInfo()
+    # Get instances in node group; this is unsafe and needs verification later
+    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
 
-    node_names = set(node.name
-                     for node in all_node_info.values()
-                     if node.group == self.group_uuid)
+    self.needed_locks = {
+      locking.LEVEL_INSTANCE: inst_names,
+      locking.LEVEL_NODEGROUP: [self.group_uuid],
+      locking.LEVEL_NODE: [],
+      }
 
-    inst_names = [inst.name
-                  for inst in all_inst_info.values()
-                  if inst.primary_node in node_names]
+    self.share_locks = _ShareAll()
 
-    # In Exec(), we warn about mirrored instances that have primary and
-    # secondary living in separate node groups. To fully verify that
-    # volumes for these instances are healthy, we will need to do an
-    # extra call to their secondaries. We ensure here those nodes will
-    # be locked.
-    for inst in inst_names:
-      if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
-        node_names.update(all_inst_info[inst].secondary_nodes)
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_NODE:
+      # Get members of node group; this is unsafe and needs verification later
+      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
 
-    self.needed_locks = {
-      locking.LEVEL_NODEGROUP: [self.group_uuid],
-      locking.LEVEL_NODE: list(node_names),
-      locking.LEVEL_INSTANCE: inst_names,
-    }
+      all_inst_info = self.cfg.GetAllInstancesInfo()
 
-    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+      # In Exec(), we warn about mirrored instances that have primary and
+      # secondary living in separate node groups. To fully verify that
+      # volumes for these instances are healthy, we will need to do an
+      # extra call to their secondaries. We ensure here those nodes will
+      # be locked.
+      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
+        # Important: access only the instances whose lock is owned
+        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
+          nodes.update(all_inst_info[inst].secondary_nodes)
 
-  def CheckPrereq(self):
-    self.all_node_info = self.cfg.GetAllNodesInfo()
-    self.all_inst_info = self.cfg.GetAllInstancesInfo()
+      self.needed_locks[locking.LEVEL_NODE] = nodes
 
-    group_nodes = set(node.name
-                      for node in self.all_node_info.values()
-                      if node.group == self.group_uuid)
+  def CheckPrereq(self):
+    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
+    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
 
-    group_instances = set(inst.name
-                          for inst in self.all_inst_info.values()
-                          if inst.primary_node in group_nodes)
+    group_nodes = set(self.group_info.members)
+    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
 
     unlocked_nodes = \
-        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
+        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
 
     unlocked_instances = \
-        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
+        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
 
     if unlocked_nodes:
-      raise errors.OpPrereqError("missing lock for nodes: %s" %
+      raise errors.OpPrereqError("Missing lock for nodes: %s" %
                                  utils.CommaJoin(unlocked_nodes))
 
     if unlocked_instances:
-      raise errors.OpPrereqError("missing lock for instances: %s" %
+      raise errors.OpPrereqError("Missing lock for instances: %s" %
                                  utils.CommaJoin(unlocked_instances))
 
+    self.all_node_info = self.cfg.GetAllNodesInfo()
+    self.all_inst_info = self.cfg.GetAllInstancesInfo()
+
     self.my_node_names = utils.NiceSort(group_nodes)
     self.my_inst_names = utils.NiceSort(group_instances)
 
@@ -1634,7 +1778,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
             extra_lv_nodes.add(nname)
 
     unlocked_lv_nodes = \
-        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
+        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
 
     if unlocked_lv_nodes:
       raise errors.OpPrereqError("these nodes could be locked: %s" %
@@ -1657,7 +1801,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
     """
     node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
 
     # main result, nresult should be a non-empty dict
     test = not nresult or not isinstance(nresult, dict)
@@ -1726,7 +1870,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
     """
     node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
 
     ntime = nresult.get(constants.NV_TIME, None)
     try:
@@ -1759,7 +1903,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
       return
 
     node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
 
     # checks vg existence and size > 20G
     vglist = nresult.get(constants.NV_VGLIST, None)
@@ -1796,7 +1940,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
       return
 
     node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
 
     missing = nresult.get(constants.NV_BRIDGES, None)
     test = not isinstance(missing, list)
@@ -1815,7 +1959,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
     """
     node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
 
     test = constants.NV_NODELIST not in nresult
     _ErrorIf(test, self.ENODESSH, node,
@@ -1856,7 +2000,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
     available on the instance's node.
 
     """
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
     node_current = instanceconfig.primary_node
 
     node_vol_should = {}
@@ -1955,7 +2099,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
   @classmethod
   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
-                   (files_all, files_all_opt, files_mc, files_vm)):
+                   (files_all, files_opt, files_mc, files_vm)):
     """Verifies file checksums collected from all nodes.
 
     @param errorif: Callback for reporting errors
@@ -1964,25 +2108,35 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
     @param all_nvinfo: RPC results
 
     """
-    node_names = frozenset(node.name for node in nodeinfo)
+    # Define functions determining which nodes to consider for a file
+    files2nodefn = [
+      (files_all, None),
+      (files_mc, lambda node: (node.master_candidate or
+                               node.name == master_node)),
+      (files_vm, lambda node: node.vm_capable),
+      ]
 
-    assert master_node in node_names
-    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
-            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
-           "Found file listed in more than one file list"
+    # Build mapping from filename to list of nodes which should have the file
+    nodefiles = {}
+    for (files, fn) in files2nodefn:
+      if fn is None:
+        filenodes = nodeinfo
+      else:
+        filenodes = filter(fn, nodeinfo)
+      nodefiles.update((filename,
+                        frozenset(map(operator.attrgetter("name"), filenodes)))
+                       for filename in files)
 
-    # Define functions determining which nodes to consider for a file
-    file2nodefn = dict([(filename, fn)
-      for (files, fn) in [(files_all, None),
-                          (files_all_opt, None),
-                          (files_mc, lambda node: (node.master_candidate or
-                                                   node.name == master_node)),
-                          (files_vm, lambda node: node.vm_capable)]
-      for filename in files])
+    assert set(nodefiles) == (files_all | files_mc | files_vm)
 
-    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
+    fileinfo = dict((filename, {}) for filename in nodefiles)
+    ignore_nodes = set()
 
     for node in nodeinfo:
+      if node.offline:
+        ignore_nodes.add(node.name)
+        continue
+
       nresult = all_nvinfo[node.name]
 
       if nresult.fail_msg or not nresult.payload:
@@ -1994,13 +2148,13 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
       errorif(test, cls.ENODEFILECHECK, node.name,
               "Node did not return file checksum data")
       if test:
+        ignore_nodes.add(node.name)
         continue
 
+      # Build per-checksum mapping from filename to nodes having it
       for (filename, checksum) in node_files.items():
-        # Check if the file should be considered for a node
-        fn = file2nodefn[filename]
-        if fn is None or fn(node):
-          fileinfo[filename].setdefault(checksum, set()).add(node.name)
+        assert filename in nodefiles
+        fileinfo[filename].setdefault(checksum, set()).add(node.name)
 
     for (filename, checksums) in fileinfo.items():
       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
@@ -2008,23 +2162,33 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
       # Nodes having the file
       with_file = frozenset(node_name
                             for nodes in fileinfo[filename].values()
-                            for node_name in nodes)
+                            for node_name in nodes) - ignore_nodes
+
+      expected_nodes = nodefiles[filename] - ignore_nodes
 
       # Nodes missing file
-      missing_file = node_names - with_file
+      missing_file = expected_nodes - with_file
 
-      if filename in files_all_opt:
+      if filename in files_opt:
         # All or no nodes
-        errorif(missing_file and missing_file != node_names,
+        errorif(missing_file and missing_file != expected_nodes,
                 cls.ECLUSTERFILECHECK, None,
-                "File %s is optional, but it must exist on all or no nodes (not"
-                " found on %s)",
+                "File %s is optional, but it must exist on all or no"
+                " nodes (not found on %s)",
                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
       else:
+        # Non-optional files
         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
                 "File %s is missing from node(s) %s", filename,
                 utils.CommaJoin(utils.NiceSort(missing_file)))
 
+        # Warn if a node has a file it shouldn't
+        unexpected = with_file - expected_nodes
+        errorif(unexpected,
+                cls.ECLUSTERFILECHECK, None,
+                "File %s should not exist on node(s) %s",
+                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
+
       # See if there are multiple versions of the file
       test = len(checksums) > 1
       if test:
@@ -2053,7 +2217,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
     """
     node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
 
     if drbd_helper:
       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
@@ -2112,7 +2276,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
     """
     node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
 
     remote_os = nresult.get(constants.NV_OSLIST, None)
     test = (not isinstance(remote_os, list) or
@@ -2153,7 +2317,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
     """
     node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
 
     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
 
@@ -2166,11 +2330,6 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
                "OS '%s' has multiple entries (first one shadows the rest): %s",
                os_name, utils.CommaJoin([v[0] for v in os_data]))
-      # this will catched in backend too
-      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
-               and not f_var, self.ENODEOS, node,
-               "OS %s with API at least %d does not declare any variant",
-               os_name, constants.OS_API_V15)
       # comparisons with the 'base' image
       test = os_name not in base.oslist
       _ErrorIf(test, self.ENODEOS, node,
@@ -2228,7 +2387,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
     """
     node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
 
     nimg.lvm_fail = True
     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
@@ -2276,7 +2435,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
     """
     node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
 
     # try to read free memory (from the hypervisor)
     hv_info = nresult.get(constants.NV_HVINFO, None)
@@ -2318,7 +2477,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
         list of tuples (success, payload)
 
     """
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
 
     node_disks = {}
     node_disks_devonly = {}
@@ -2400,6 +2559,40 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
     return instdisk
 
+  @staticmethod
+  def _SshNodeSelector(group_uuid, all_nodes):
+    """Create endless iterators for all potential SSH check hosts.
+
+    """
+    nodes = [node for node in all_nodes
+             if (node.group != group_uuid and
+                 not node.offline)]
+    keyfunc = operator.attrgetter("group")
+
+    return map(itertools.cycle,
+               [sorted(map(operator.attrgetter("name"), names))
+                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
+                                                  keyfunc)])
+
+  @classmethod
+  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
+    """Choose which nodes should talk to which other nodes.
+
+    We will make nodes contact all nodes in their group, and one node from
+    every other group.
+
+    @warning: This algorithm has a known issue if one node group is much
+      smaller than others (e.g. just one node). In such a case all other
+      nodes will talk to the single node.
+
+    """
+    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
+    sel = cls._SshNodeSelector(group_uuid, all_nodes)
+
+    return (online_nodes,
+            dict((name, sorted([i.next() for i in sel]))
+                 for name in online_nodes))
+
   def BuildHooksEnv(self):
     """Build hooks env.
 
@@ -2420,17 +2613,22 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
     """Build hooks nodes.
 
     """
-    assert self.my_node_names, ("Node list not gathered,"
-      " has CheckPrereq been executed?")
     return ([], self.my_node_names)
 
   def Exec(self, feedback_fn):
     """Verify integrity of the node group, performing various test on nodes.
 
     """
-    # This method has too many local variables. pylint: disable-msg=R0914
+    # This method has too many local variables. pylint: disable=R0914
+    feedback_fn("* Verifying group '%s'" % self.group_info.name)
+
+    if not self.my_node_names:
+      # empty node group
+      feedback_fn("* Empty node group, skipping verification")
+      return True
+
     self.bad = False
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
     verbose = self.op.verbose
     self._feedback_fn = feedback_fn
 
@@ -2458,25 +2656,14 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
 
     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
 
-    # We will make nodes contact all nodes in their group, and one node from
-    # every other group.
-    # TODO: should it be a *random* node, different every time?
-    online_nodes = [node.name for node in node_data_list if not node.offline]
-    other_group_nodes = {}
-
-    for name in sorted(self.all_node_info):
-      node = self.all_node_info[name]
-      if (node.group not in other_group_nodes
-          and node.group != self.group_uuid
-          and not node.offline):
-        other_group_nodes[node.group] = node.name
-
     node_verify_param = {
       constants.NV_FILELIST:
         utils.UniqueSequence(filename
                              for files in filemap
                              for filename in files),
-      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
+      constants.NV_NODELIST:
+        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
+                                  self.all_node_info.values()),
       constants.NV_HYPERVISOR: hypervisors,
       constants.NV_HVPARAMS:
         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
@@ -2565,6 +2752,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
                                            node_verify_param,
                                            self.cfg.GetClusterName())
+    nvinfo_endtime = time.time()
+
     if self.extra_lv_nodes and vg_name is not None:
       extra_lv_nvinfo = \
           self.rpc.call_node_verify(self.extra_lv_nodes,
@@ -2572,7 +2761,6 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
                                     self.cfg.GetClusterName())
     else:
       extra_lv_nvinfo = {}
-    nvinfo_endtime = time.time()
 
     all_drbd_map = self.cfg.ComputeDRBDMap()
 
@@ -2808,9 +2996,12 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
         and hook results
 
     """
-    # We only really run POST phase hooks, and are only interested in
-    # their results
-    if phase == constants.HOOKS_PHASE_POST:
+    # We only really run POST phase hooks, only for non-empty groups,
+    # and are only interested in their results
+    if not self.my_node_names:
+      # empty node group
+      pass
+    elif phase == constants.HOOKS_PHASE_POST:
       # Used to change hooks' output to proper indentation
       feedback_fn("* Hooks Results")
       assert hooks_results, "invalid result from hooks"
@@ -2822,21 +3013,19 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
         self._ErrorIf(test, self.ENODEHOOKS, node_name,
                       "Communication failure in hooks execution: %s", msg)
         if res.offline or msg:
-          # No need to investigate payload if node is offline or gave an error.
-          # override manually lu_result here as _ErrorIf only
-          # overrides self.bad
-          lu_result = 1
+          # No need to investigate payload if node is offline or gave
+          # an error.
           continue
         for script, hkr, output in res.payload:
           test = hkr == constants.HKR_FAIL
           self._ErrorIf(test, self.ENODEHOOKS, node_name,
                         "Script %s failed, output:", script)
           if test:
-            output = self._HOOKS_INDENT_RE.sub('      ', output)
+            output = self._HOOKS_INDENT_RE.sub("      ", output)
             feedback_fn("%s" % output)
-            lu_result = 0
+            lu_result = False
 
-      return lu_result
+    return lu_result
 
 
 class LUClusterVerifyDisks(NoHooksLU):
@@ -2846,11 +3035,91 @@ class LUClusterVerifyDisks(NoHooksLU):
   REQ_BGL = False
 
   def ExpandNames(self):
+    self.share_locks = _ShareAll()
     self.needed_locks = {
-      locking.LEVEL_NODE: locking.ALL_SET,
-      locking.LEVEL_INSTANCE: locking.ALL_SET,
-    }
-    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+      locking.LEVEL_NODEGROUP: locking.ALL_SET,
+      }
+
+  def Exec(self, feedback_fn):
+    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
+
+    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
+    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
+                           for group in group_names])
+
+
+class LUGroupVerifyDisks(NoHooksLU):
+  """Verifies the status of all disks in a node group.
+
+  """
+  REQ_BGL = False
+
+  def ExpandNames(self):
+    # Raises errors.OpPrereqError on its own if group can't be found
+    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
+
+    self.share_locks = _ShareAll()
+    self.needed_locks = {
+      locking.LEVEL_INSTANCE: [],
+      locking.LEVEL_NODEGROUP: [],
+      locking.LEVEL_NODE: [],
+      }
+
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_INSTANCE:
+      assert not self.needed_locks[locking.LEVEL_INSTANCE]
+
+      # Lock instances optimistically, needs verification once node and group
+      # locks have been acquired
+      self.needed_locks[locking.LEVEL_INSTANCE] = \
+        self.cfg.GetNodeGroupInstances(self.group_uuid)
+
+    elif level == locking.LEVEL_NODEGROUP:
+      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
+
+      self.needed_locks[locking.LEVEL_NODEGROUP] = \
+        set([self.group_uuid] +
+            # Lock all groups used by instances optimistically; this requires
+            # going via the node before it's locked, requiring verification
+            # later on
+            [group_uuid
+             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
+             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
+
+    elif level == locking.LEVEL_NODE:
+      # This will only lock the nodes in the group to be verified which contain
+      # actual instances
+      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+      self._LockInstancesNodes()
+
+      # Lock all nodes in group to be verified
+      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
+      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
+      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
+
+  def CheckPrereq(self):
+    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
+
+    assert self.group_uuid in owned_groups
+
+    # Check if locked instances are still correct
+    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
+
+    # Get instance information
+    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
+
+    # Check if node groups for locked instances are still correct
+    for (instance_name, inst) in self.instances.items():
+      assert owned_nodes.issuperset(inst.all_nodes), \
+        "Instance %s's nodes changed while we kept the lock" % instance_name
+
+      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
+                                             owned_groups)
+
+      assert self.group_uuid in inst_groups, \
+        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
 
   def Exec(self, feedback_fn):
     """Verify integrity of cluster disks.
@@ -2861,50 +3130,41 @@ class LUClusterVerifyDisks(NoHooksLU):
         missing volumes
 
     """
-    result = res_nodes, res_instances, res_missing = {}, [], {}
+    res_nodes = {}
+    res_instances = set()
+    res_missing = {}
 
-    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
-    instances = self.cfg.GetAllInstancesInfo().values()
+    nv_dict = _MapInstanceDisksToNodes([inst
+                                        for inst in self.instances.values()
+                                        if inst.admin_up])
 
-    nv_dict = {}
-    for inst in instances:
-      inst_lvs = {}
-      if not inst.admin_up:
-        continue
-      inst.MapLVsByNode(inst_lvs)
-      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
-      for node, vol_list in inst_lvs.iteritems():
-        for vol in vol_list:
-          nv_dict[(node, vol)] = inst
-
-    if not nv_dict:
-      return result
-
-    node_lvs = self.rpc.call_lv_list(nodes, [])
-    for node, node_res in node_lvs.items():
-      if node_res.offline:
-        continue
-      msg = node_res.fail_msg
-      if msg:
-        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
-        res_nodes[node] = msg
-        continue
+    if nv_dict:
+      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
+                             set(self.cfg.GetVmCapableNodeList()))
 
-      lvs = node_res.payload
-      for lv_name, (_, _, lv_online) in lvs.items():
-        inst = nv_dict.pop((node, lv_name), None)
-        if (not lv_online and inst is not None
-            and inst.name not in res_instances):
-          res_instances.append(inst.name)
+      node_lvs = self.rpc.call_lv_list(nodes, [])
 
-    # any leftover items in nv_dict are missing LVs, let's arrange the
-    # data better
-    for key, inst in nv_dict.iteritems():
-      if inst.name not in res_missing:
-        res_missing[inst.name] = []
-      res_missing[inst.name].append(key)
+      for (node, node_res) in node_lvs.items():
+        if node_res.offline:
+          continue
 
-    return result
+        msg = node_res.fail_msg
+        if msg:
+          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
+          res_nodes[node] = msg
+          continue
+
+        for lv_name, (_, _, lv_online) in node_res.payload.items():
+          inst = nv_dict.pop((node, lv_name), None)
+          if not (lv_online or inst is None):
+            res_instances.add(inst)
+
+      # any leftover items in nv_dict are missing LVs, let's arrange the data
+      # better
+      for key, inst in nv_dict.iteritems():
+        res_missing.setdefault(inst, []).append(key)
+
+    return (res_nodes, list(res_instances), res_missing)
 
 
 class LUClusterRepairDiskSizes(NoHooksLU):
@@ -2927,7 +3187,7 @@ class LUClusterRepairDiskSizes(NoHooksLU):
         locking.LEVEL_NODE: locking.ALL_SET,
         locking.LEVEL_INSTANCE: locking.ALL_SET,
         }
-    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+    self.share_locks = _ShareAll()
 
   def DeclareLocks(self, level):
     if level == locking.LEVEL_NODE and self.wanted_names is not None:
@@ -2940,10 +3200,10 @@ class LUClusterRepairDiskSizes(NoHooksLU):
 
     """
     if self.wanted_names is None:
-      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
+      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
 
-    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
-                             in self.wanted_names]
+    self.wanted_instances = \
+        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
 
   def _EnsureChildSizes(self, disk):
     """Ensure children of the disk have the needed disk size.
@@ -3075,7 +3335,7 @@ class LUClusterRename(LogicalUnit):
 
     # shutdown the master IP
     master = self.cfg.GetMasterNode()
-    result = self.rpc.call_node_stop_master(master, False)
+    result = self.rpc.call_node_deactivate_master_ip(master)
     result.Raise("Could not disable the master role")
 
     try:
@@ -3093,7 +3353,7 @@ class LUClusterRename(LogicalUnit):
         pass
       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
     finally:
-      result = self.rpc.call_node_start_master(master, False, False)
+      result = self.rpc.call_node_activate_master_ip(master)
       msg = result.fail_msg
       if msg:
         self.LogWarning("Could not re-enable the master role on"
@@ -3165,7 +3425,7 @@ class LUClusterSetParams(LogicalUnit):
                                    " drbd-based instances exist",
                                    errors.ECODE_INVAL)
 
-    node_list = self.glm.list_owned(locking.LEVEL_NODE)
+    node_list = self.owned_locks(locking.LEVEL_NODE)
 
     # if vg_name not None, checks given volume group on all nodes
     if self.op.vg_name:
@@ -3187,8 +3447,7 @@ class LUClusterSetParams(LogicalUnit):
     if self.op.drbd_helper:
       # checks given drbd helper on all nodes
       helpers = self.rpc.call_drbd_helper(node_list)
-      for node in node_list:
-        ninfo = self.cfg.GetNodeInfo(node)
+      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
         if ninfo.offline:
           self.LogInfo("Not checking drbd helper on offline node %s", node)
           continue
@@ -3427,7 +3686,7 @@ class LUClusterSetParams(LogicalUnit):
       master = self.cfg.GetMasterNode()
       feedback_fn("Shutting down master ip on the current netdev (%s)" %
                   self.cluster.master_netdev)
-      result = self.rpc.call_node_stop_master(master, False)
+      result = self.rpc.call_node_deactivate_master_ip(master)
       result.Raise("Could not disable the master ip")
       feedback_fn("Changing master_netdev from %s to %s" %
                   (self.cluster.master_netdev, self.op.master_netdev))
@@ -3438,7 +3697,7 @@ class LUClusterSetParams(LogicalUnit):
     if self.op.master_netdev:
       feedback_fn("Starting the master ip on the new master netdev (%s)" %
                   self.op.master_netdev)
-      result = self.rpc.call_node_start_master(master, False, False)
+      result = self.rpc.call_node_activate_master_ip(master)
       if result.fail_msg:
         self.LogWarning("Could not re-enable the master ip on"
                         " the master, please restart manually: %s",
@@ -3471,17 +3730,23 @@ def _ComputeAncillaryFiles(cluster, redist):
     constants.SSH_KNOWN_HOSTS_FILE,
     constants.CONFD_HMAC_KEY,
     constants.CLUSTER_DOMAIN_SECRET_FILE,
+    constants.RAPI_USERS_FILE,
     ])
 
   if not redist:
     files_all.update(constants.ALL_CERT_FILES)
     files_all.update(ssconf.SimpleStore().GetFileList())
+  else:
+    # we need to ship at least the RAPI certificate
+    files_all.add(constants.RAPI_CERT_FILE)
 
   if cluster.modify_etc_hosts:
     files_all.add(constants.ETC_HOSTS)
 
-  # Files which must either exist on all nodes or on none
-  files_all_opt = set([
+  # Files which are optional, these must:
+  # - be present in one other category as well
+  # - either exist or not exist on all nodes of that category (mc, vm all)
+  files_opt = set([
     constants.RAPI_USERS_FILE,
     ])
 
@@ -3493,14 +3758,23 @@ def _ComputeAncillaryFiles(cluster, redist):
   # Files which should only be on VM-capable nodes
   files_vm = set(filename
     for hv_name in cluster.enabled_hypervisors
-    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
+    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
 
-  # Filenames must be unique
-  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
-          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
+  files_opt |= set(filename
+    for hv_name in cluster.enabled_hypervisors
+    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
+
+  # Filenames in each category must be unique
+  all_files_set = files_all | files_mc | files_vm
+  assert (len(all_files_set) ==
+          sum(map(len, [files_all, files_mc, files_vm]))), \
          "Found file listed in more than one file list"
 
-  return (files_all, files_all_opt, files_mc, files_vm)
+  # Optional files must be present in one other category
+  assert all_files_set.issuperset(files_opt), \
+         "Optional file not in a different required list"
+
+  return (files_all, files_opt, files_mc, files_vm)
 
 
 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
@@ -3534,7 +3808,7 @@ def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
       nodelist.remove(master_info.name)
 
   # Gather file lists
-  (files_all, files_all_opt, files_mc, files_vm) = \
+  (files_all, _, files_mc, files_vm) = \
     _ComputeAncillaryFiles(cluster, True)
 
   # Never re-distribute configuration file from here
@@ -3544,7 +3818,6 @@ def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
 
   filemap = [
     (online_nodes, files_all),
-    (online_nodes, files_all_opt),
     (vm_nodes, files_vm),
     ]
 
@@ -3576,6 +3849,30 @@ class LUClusterRedistConf(NoHooksLU):
     _RedistributeAncillaryFiles(self)
 
 
+class LUClusterActivateMasterIp(NoHooksLU):
+  """Activate the master IP on the master node.
+
+  """
+  def Exec(self, feedback_fn):
+    """Activate the master IP.
+
+    """
+    master = self.cfg.GetMasterNode()
+    self.rpc.call_node_activate_master_ip(master)
+
+
+class LUClusterDeactivateMasterIp(NoHooksLU):
+  """Deactivate the master IP on the master node.
+
+  """
+  def Exec(self, feedback_fn):
+    """Deactivate the master IP.
+
+    """
+    master = self.cfg.GetMasterNode()
+    self.rpc.call_node_deactivate_master_ip(master)
+
+
 def _WaitForSync(lu, instance, disks=None, oneshot=False):
   """Sleep and poll for an instance's disk to sync.
 
@@ -3747,9 +4044,7 @@ class LUOobCommand(NoHooksLU):
     if self.op.command in self._SKIP_MASTER:
       assert self.master_node not in self.op.node_names
 
-    for node_name in self.op.node_names:
-      node = self.cfg.GetNodeInfo(node_name)
-
+    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
       if node is None:
         raise errors.OpPrereqError("Node %s not found" % node_name,
                                    errors.ECODE_NOENT)
@@ -3866,6 +4161,7 @@ class LUOobCommand(NoHooksLU):
       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
                                utils.CommaJoin(errs))
 
+
 class _OsQuery(_QueryBase):
   FIELDS = query.OS_FIELDS
 
@@ -4073,15 +4369,12 @@ class LUNodeRemove(LogicalUnit):
     node = self.cfg.GetNodeInfo(self.op.node_name)
     assert node is not None
 
-    instance_list = self.cfg.GetInstanceList()
-
     masternode = self.cfg.GetMasterNode()
     if node.name == masternode:
       raise errors.OpPrereqError("Node is the master node, failover to another"
                                  " node is required", errors.ECODE_INVAL)
 
-    for instance_name in instance_list:
-      instance = self.cfg.GetInstanceInfo(instance_name)
+    for instance_name, instance in self.cfg.GetAllInstancesInfo():
       if node.name in instance.all_nodes:
         raise errors.OpPrereqError("Instance %s is still running on the node,"
                                    " please remove first" % instance_name,
@@ -4127,7 +4420,7 @@ class _NodeQuery(_QueryBase):
 
   def ExpandNames(self, lu):
     lu.needed_locks = {}
-    lu.share_locks[locking.LEVEL_NODE] = 1
+    lu.share_locks = _ShareAll()
 
     if self.names:
       self.wanted = _GetWantedNodes(lu, self.names)
@@ -4138,7 +4431,7 @@ class _NodeQuery(_QueryBase):
                        query.NQ_LIVE in self.requested_data)
 
     if self.do_locking:
-      # if we don't request only static fields, we need to lock the nodes
+      # If any non-static field is requested we need to lock the nodes
       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
 
   def DeclareLocks(self, lu, level):
@@ -4202,7 +4495,7 @@ class LUNodeQuery(NoHooksLU):
   """Logical unit for querying nodes.
 
   """
-  # pylint: disable-msg=W0142
+  # pylint: disable=W0142
   REQ_BGL = False
 
   def CheckArguments(self):
@@ -4242,13 +4535,11 @@ class LUNodeQueryvols(NoHooksLU):
     """Computes the list of nodes and their attributes.
 
     """
-    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
+    nodenames = self.owned_locks(locking.LEVEL_NODE)
     volumes = self.rpc.call_node_volumes(nodenames)
 
-    ilist = [self.cfg.GetInstanceInfo(iname) for iname
-             in self.cfg.GetInstanceList()]
-
-    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
+    ilist = self.cfg.GetAllInstancesInfo()
+    vol2inst = _MapInstanceDisksToNodes(ilist.values())
 
     output = []
     for node in nodenames:
@@ -4260,8 +4551,8 @@ class LUNodeQueryvols(NoHooksLU):
         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
         continue
 
-      node_vols = nresult.payload[:]
-      node_vols.sort(key=lambda vol: vol['dev'])
+      node_vols = sorted(nresult.payload,
+                         key=operator.itemgetter("dev"))
 
       for vol in node_vols:
         node_output = []
@@ -4269,22 +4560,15 @@ class LUNodeQueryvols(NoHooksLU):
           if field == "node":
             val = node
           elif field == "phys":
-            val = vol['dev']
+            val = vol["dev"]
           elif field == "vg":
-            val = vol['vg']
+            val = vol["vg"]
           elif field == "name":
-            val = vol['name']
+            val = vol["name"]
           elif field == "size":
-            val = int(float(vol['size']))
+            val = int(float(vol["size"]))
           elif field == "instance":
-            for inst in ilist:
-              if node not in lv_by_node[inst]:
-                continue
-              if vol['name'] in lv_by_node[inst][node]:
-                val = inst.name
-                break
-            else:
-              val = '-'
+            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
           else:
             raise errors.ParameterError(field)
           node_output.append(str(val))
@@ -4320,7 +4604,7 @@ class LUNodeQueryStorage(NoHooksLU):
     """Computes the list of nodes and their attributes.
 
     """
-    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
+    self.nodes = self.owned_locks(locking.LEVEL_NODE)
 
     # Always get name to sort by
     if constants.SF_NAME in self.op.output_fields:
@@ -4382,8 +4666,7 @@ class _InstanceQuery(_QueryBase):
 
   def ExpandNames(self, lu):
     lu.needed_locks = {}
-    lu.share_locks[locking.LEVEL_INSTANCE] = 1
-    lu.share_locks[locking.LEVEL_NODE] = 1
+    lu.share_locks = _ShareAll()
 
     if self.names:
       self.wanted = _GetWantedInstances(lu, self.names)
@@ -4394,17 +4677,43 @@ class _InstanceQuery(_QueryBase):
                        query.IQ_LIVE in self.requested_data)
     if self.do_locking:
       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
+      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
       lu.needed_locks[locking.LEVEL_NODE] = []
       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
 
+    self.do_grouplocks = (self.do_locking and
+                          query.IQ_NODES in self.requested_data)
+
   def DeclareLocks(self, lu, level):
-    if level == locking.LEVEL_NODE and self.do_locking:
-      lu._LockInstancesNodes() # pylint: disable-msg=W0212
+    if self.do_locking:
+      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
+        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
+
+        # Lock all groups used by instances optimistically; this requires going
+        # via the node before it's locked, requiring verification later on
+        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
+          set(group_uuid
+              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
+              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
+      elif level == locking.LEVEL_NODE:
+        lu._LockInstancesNodes() # pylint: disable=W0212
+
+  @staticmethod
+  def _CheckGroupLocks(lu):
+    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
+    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
+
+    # Check if node groups for locked instances are still correct
+    for instance_name in owned_instances:
+      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
 
   def _GetQueryData(self, lu):
     """Computes the list of instances and their attributes.
 
     """
+    if self.do_grouplocks:
+      self._CheckGroupLocks(lu)
+
     cluster = lu.cfg.GetClusterInfo()
     all_info = lu.cfg.GetAllInstancesInfo()
 
@@ -4467,22 +4776,34 @@ class _InstanceQuery(_QueryBase):
     else:
       consinfo = None
 
+    if query.IQ_NODES in self.requested_data:
+      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
+                                            instance_list)))
+      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
+      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
+                    for uuid in set(map(operator.attrgetter("group"),
+                                        nodes.values())))
+    else:
+      nodes = None
+      groups = None
+
     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
                                    disk_usage, offline_nodes, bad_nodes,
-                                   live_data, wrongnode_inst, consinfo)
+                                   live_data, wrongnode_inst, consinfo,
+                                   nodes, groups)
 
 
 class LUQuery(NoHooksLU):
   """Query for resources/items of a certain kind.
 
   """
-  # pylint: disable-msg=W0142
+  # pylint: disable=W0142
   REQ_BGL = False
 
   def CheckArguments(self):
     qcls = _GetQueryImplementation(self.op.what)
 
-    self.impl = qcls(self.op.filter, self.op.fields, False)
+    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
 
   def ExpandNames(self):
     self.impl.ExpandNames(self)
@@ -4498,7 +4819,7 @@ class LUQueryFields(NoHooksLU):
   """Query for resources/items of a certain kind.
 
   """
-  # pylint: disable-msg=W0142
+  # pylint: disable=W0142
   REQ_BGL = False
 
   def CheckArguments(self):
@@ -4638,9 +4959,7 @@ class LUNodeAdd(LogicalUnit):
 
     self.changed_primary_ip = False
 
-    for existing_node_name in node_list:
-      existing_node = cfg.GetNodeInfo(existing_node_name)
-
+    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
       if self.op.readd and node == existing_node_name:
         if existing_node.secondary_ip != secondary_ip:
           raise errors.OpPrereqError("Readded node doesn't have the same IP"
@@ -4747,7 +5066,7 @@ class LUNodeAdd(LogicalUnit):
     # later in the procedure; this also means that if the re-add
     # fails, we are left with a non-offlined, broken node
     if self.op.readd:
-      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
+      new_node.drained = new_node.offline = False # pylint: disable=W0201
       self.LogInfo("Readding a node, the offline/drained flags were reset")
       # if we demote the node, we do cleanup later in the procedure
       new_node.master_candidate = self.master_candidate
@@ -4793,7 +5112,7 @@ class LUNodeAdd(LogicalUnit):
 
     node_verify_list = [self.cfg.GetMasterNode()]
     node_verify_param = {
-      constants.NV_NODELIST: [node],
+      constants.NV_NODELIST: ([node], {}),
       # TODO: do a node-net-test as well?
     }
 
@@ -4895,8 +5214,8 @@ class LUNodeSetParams(LogicalUnit):
         instances_keep = []
 
         # Build list of instances to release
-        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
-          instance = self.context.cfg.GetInstanceInfo(instance_name)
+        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
+        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
           if (instance.disk_template in constants.DTS_INT_MIRROR and
               self.op.node_name in instance.all_nodes):
             instances_keep.append(instance_name)
@@ -4904,7 +5223,7 @@ class LUNodeSetParams(LogicalUnit):
 
         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
 
-        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
+        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
                 set(instances_keep))
 
   def BuildHooksEnv(self):
@@ -5523,7 +5842,7 @@ def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
   nodeinfo[node].Raise("Can't get data from node %s" % node,
                        prereq=True, ecode=errors.ECODE_ENVIRON)
-  free_mem = nodeinfo[node].payload.get('memory_free', None)
+  free_mem = nodeinfo[node].payload.get("memory_free", None)
   if not isinstance(free_mem, int):
     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
                                " was '%s'" % (node, free_mem),
@@ -5698,7 +6017,8 @@ class LUInstanceStartup(LogicalUnit):
       _StartInstanceDisks(self, instance, force)
 
       result = self.rpc.call_instance_start(node_current, instance,
-                                            self.op.hvparams, self.op.beparams)
+                                            self.op.hvparams, self.op.beparams,
+                                            self.op.startup_paused)
       msg = result.fail_msg
       if msg:
         _ShutdownInstanceDisks(self, instance)
@@ -5788,7 +6108,8 @@ class LUInstanceReboot(LogicalUnit):
         self.LogInfo("Instance %s was already stopped, starting now",
                      instance.name)
       _StartInstanceDisks(self, instance, ignore_secondaries)
-      result = self.rpc.call_instance_start(node_current, instance, None, None)
+      result = self.rpc.call_instance_start(node_current, instance,
+                                            None, None, False)
       msg = result.fail_msg
       if msg:
         _ShutdownInstanceDisks(self, instance)
@@ -6053,31 +6374,44 @@ class LUInstanceRecreateDisks(LogicalUnit):
     """Recreate the disks.
 
     """
-    # change primary node, if needed
-    if self.op.nodes:
-      self.instance.primary_node = self.op.nodes[0]
-      self.LogWarning("Changing the instance's nodes, you will have to"
-                      " remove any disks left on the older nodes manually")
+    instance = self.instance
 
     to_skip = []
-    for idx, disk in enumerate(self.instance.disks):
+    mods = [] # keeps track of needed logical_id changes
+
+    for idx, disk in enumerate(instance.disks):
       if idx not in self.op.disks: # disk idx has not been passed in
         to_skip.append(idx)
         continue
       # update secondaries for disks, if needed
       if self.op.nodes:
         if disk.dev_type == constants.LD_DRBD8:
-          # need to update the nodes
+          # need to update the nodes and minors
           assert len(self.op.nodes) == 2
-          logical_id = list(disk.logical_id)
-          logical_id[0] = self.op.nodes[0]
-          logical_id[1] = self.op.nodes[1]
-          disk.logical_id = tuple(logical_id)
+          assert len(disk.logical_id) == 6 # otherwise disk internals
+                                           # have changed
+          (_, _, old_port, _, _, old_secret) = disk.logical_id
+          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
+          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
+                    new_minors[0], new_minors[1], old_secret)
+          assert len(disk.logical_id) == len(new_id)
+          mods.append((idx, new_id))
+
+    # now that we have passed all asserts above, we can apply the mods
+    # in a single run (to avoid partial changes)
+    for idx, new_id in mods:
+      instance.disks[idx].logical_id = new_id
+
+    # change primary node, if needed
+    if self.op.nodes:
+      instance.primary_node = self.op.nodes[0]
+      self.LogWarning("Changing the instance's nodes, you will have to"
+                      " remove any disks left on the older nodes manually")
 
     if self.op.nodes:
-      self.cfg.Update(self.instance, feedback_fn)
+      self.cfg.Update(instance, feedback_fn)
 
-    _CreateDisks(self, self.instance, to_skip=to_skip)
+    _CreateDisks(self, instance, to_skip=to_skip)
 
 
 class LUInstanceRename(LogicalUnit):
@@ -6158,7 +6492,7 @@ class LUInstanceRename(LogicalUnit):
     old_name = inst.name
 
     rename_file_storage = False
-    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
+    if (inst.disk_template in constants.DTS_FILEBASED and
         self.op.new_name != inst.name):
       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
       rename_file_storage = True
@@ -6292,7 +6626,7 @@ class LUInstanceQuery(NoHooksLU):
   """Logical unit for querying instances.
 
   """
-  # pylint: disable-msg=W0142
+  # pylint: disable=W0142
   REQ_BGL = False
 
   def CheckArguments(self):
@@ -6639,7 +6973,8 @@ class LUInstanceMove(LogicalUnit):
         _ShutdownInstanceDisks(self, instance)
         raise errors.OpExecError("Can't activate the instance's disks")
 
-      result = self.rpc.call_instance_start(target_node, instance, None, None)
+      result = self.rpc.call_instance_start(target_node, instance,
+                                            None, None, False)
       msg = result.fail_msg
       if msg:
         _ShutdownInstanceDisks(self, instance)
@@ -6661,7 +6996,7 @@ class LUNodeMigrate(LogicalUnit):
   def ExpandNames(self):
     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
 
-    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+    self.share_locks = _ShareAll()
     self.needed_locks = {
       locking.LEVEL_NODE: [self.op.node_name],
       }
@@ -6702,7 +7037,7 @@ class LUNodeMigrate(LogicalUnit):
     # running the iallocator and the actual migration, a good consistency model
     # will have to be found.
 
-    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
+    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
             frozenset([self.op.node_name]))
 
     return ResultWithJobs(jobs)
@@ -7084,6 +7419,21 @@ class TLMigrateInstance(Tasklet):
     target_node = self.target_node
     source_node = self.source_node
 
+    # Check for hypervisor version mismatch and warn the user.
+    nodeinfo = self.rpc.call_node_info([source_node, target_node],
+                                       None, self.instance.hypervisor)
+    src_info = nodeinfo[source_node]
+    dst_info = nodeinfo[target_node]
+
+    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
+        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
+      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
+      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
+      if src_version != dst_version:
+        self.feedback_fn("* warning: hypervisor version mismatch between"
+                         " source (%s) and target (%s) node" %
+                         (src_version, dst_version))
+
     self.feedback_fn("* checking disk consistency between source and target")
     for dev in instance.disks:
       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
@@ -7180,8 +7530,12 @@ class TLMigrateInstance(Tasklet):
       self.feedback_fn("* checking disk consistency between source and target")
       for dev in instance.disks:
         # for drbd, these are drbd over lvm
-        if not _CheckDiskConsistency(self, dev, target_node, False):
-          if not self.ignore_consistency:
+        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
+          if primary_node.offline:
+            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
+                             " target node %s" %
+                             (primary_node.name, dev.iv_name, target_node))
+          elif not self.ignore_consistency:
             raise errors.OpExecError("Disk %s is degraded on target node,"
                                      " aborting failover" % dev.iv_name)
     else:
@@ -7207,8 +7561,8 @@ class TLMigrateInstance(Tasklet):
                                  (instance.name, source_node, msg))
 
     self.feedback_fn("* deactivating the instance's disks on source node")
-    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
-      raise errors.OpExecError("Can't shut down the instance's disks.")
+    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
+      raise errors.OpExecError("Can't shut down the instance's disks")
 
     instance.primary_node = target_node
     # distribute new instance config to the other nodes
@@ -7216,21 +7570,24 @@ class TLMigrateInstance(Tasklet):
 
     # Only start the instance if it's marked as up
     if instance.admin_up:
-      self.feedback_fn("* activating the instance's disks on target node")
+      self.feedback_fn("* activating the instance's disks on target node %s" %
+                       target_node)
       logging.info("Starting instance %s on node %s",
                    instance.name, target_node)
 
-      disks_ok, _ = _AssembleInstanceDisks(self, instance,
+      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
                                            ignore_secondaries=True)
       if not disks_ok:
-        _ShutdownInstanceDisks(self, instance)
+        _ShutdownInstanceDisks(self.lu, instance)
         raise errors.OpExecError("Can't activate the instance's disks")
 
-      self.feedback_fn("* starting the instance on the target node")
-      result = self.rpc.call_instance_start(target_node, instance, None, None)
+      self.feedback_fn("* starting the instance on the target node %s" %
+                       target_node)
+      result = self.rpc.call_instance_start(target_node, instance, None, None,
+                                            False)
       msg = result.fail_msg
       if msg:
-        _ShutdownInstanceDisks(self, instance)
+        _ShutdownInstanceDisks(self.lu, instance)
         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
                                  (instance.name, target_node, msg))
 
@@ -7248,10 +7605,8 @@ class TLMigrateInstance(Tasklet):
       # directly, or through an iallocator.
 
     self.all_nodes = [self.source_node, self.target_node]
-    self.nodes_ip = {
-      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
-      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
-      }
+    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
+                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
 
     if self.failover:
       feedback_fn("Failover instance %s" % self.instance.name)
@@ -7591,7 +7946,7 @@ def _CreateDisks(lu, instance, to_skip=None, target_node=None):
     pnode = target_node
     all_nodes = [pnode]
 
-  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
+  if instance.disk_template in constants.DTS_FILEBASED:
     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
 
@@ -7687,7 +8042,7 @@ def _ComputeDiskSizePerVG(disk_template, disks):
 
   if disk_template not in req_size_dict:
     raise errors.ProgrammerError("Disk template '%s' size requirement"
-                                 " is unknown" %  disk_template)
+                                 " is unknown" % disk_template)
 
   return req_size_dict[disk_template]
 
@@ -7709,7 +8064,7 @@ def _ComputeDiskSize(disk_template, disks):
 
   if disk_template not in req_size_dict:
     raise errors.ProgrammerError("Disk template '%s' size requirement"
-                                 " is unknown" %  disk_template)
+                                 " is unknown" % disk_template)
 
   return req_size_dict[disk_template]
 
@@ -7864,9 +8219,10 @@ class LUInstanceCreate(LogicalUnit):
       raise errors.OpPrereqError("Invalid file driver name '%s'" %
                                  self.op.file_driver, errors.ECODE_INVAL)
 
-    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
-      raise errors.OpPrereqError("File storage directory path not absolute",
-                                 errors.ECODE_INVAL)
+    if self.op.disk_template == constants.DT_FILE:
+      opcodes.RequireFileStorage()
+    elif self.op.disk_template == constants.DT_SHARED_FILE:
+      opcodes.RequireSharedFileStorage()
 
     ### Node/iallocator related checks
     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
@@ -7988,8 +8344,8 @@ class LUInstanceCreate(LogicalUnit):
         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
         self.op.src_node = None
         if os.path.isabs(src_path):
-          raise errors.OpPrereqError("Importing an instance from an absolute"
-                                     " path requires a source node option",
+          raise errors.OpPrereqError("Importing an instance from a path"
+                                     " requires a source node option",
                                      errors.ECODE_INVAL)
       else:
         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
@@ -8092,7 +8448,7 @@ class LUInstanceCreate(LogicalUnit):
     src_path = self.op.src_path
 
     if src_node is None:
-      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
+      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
       exp_list = self.rpc.call_export_list(locked_nodes)
       found = False
       for node in exp_list:
@@ -8167,9 +8523,13 @@ class LUInstanceCreate(LogicalUnit):
         nics.append(ndict)
       self.op.nics = nics
 
+    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
+      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
+
     if (self.op.hypervisor is None and
         einfo.has_option(constants.INISECT_INS, "hypervisor")):
       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
+
     if einfo.has_section(constants.INISECT_HYP):
       # use the export parameters but do not override the ones
       # specified by the user
@@ -8221,10 +8581,40 @@ class LUInstanceCreate(LogicalUnit):
       if name in os_defs and os_defs[name] == self.op.osparams[name]:
         del self.op.osparams[name]
 
+  def _CalculateFileStorageDir(self):
+    """Calculate final instance file storage dir.
+
+    """
+    # file storage dir calculation/check
+    self.instance_file_storage_dir = None
+    if self.op.disk_template in constants.DTS_FILEBASED:
+      # build the full file storage dir path
+      joinargs = []
+
+      if self.op.disk_template == constants.DT_SHARED_FILE:
+        get_fsd_fn = self.cfg.GetSharedFileStorageDir
+      else:
+        get_fsd_fn = self.cfg.GetFileStorageDir
+
+      cfg_storagedir = get_fsd_fn()
+      if not cfg_storagedir:
+        raise errors.OpPrereqError("Cluster file storage dir not defined")
+      joinargs.append(cfg_storagedir)
+
+      if self.op.file_storage_dir is not None:
+        joinargs.append(self.op.file_storage_dir)
+
+      joinargs.append(self.op.instance_name)
+
+      # pylint: disable=W0142
+      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
+
   def CheckPrereq(self):
     """Check prerequisites.
 
     """
+    self._CalculateFileStorageDir()
+
     if self.op.mode == constants.INSTANCE_IMPORT:
       export_info = self._ReadExportInfo()
       self._ReadExportParams(export_info)
@@ -8371,7 +8761,7 @@ class LUInstanceCreate(LogicalUnit):
 
       disk_images = []
       for idx in range(export_disks):
-        option = 'disk%d_dump' % idx
+        option = "disk%d_dump" % idx
         if export_info.has_option(constants.INISECT_INS, option):
           # FIXME: are the old os-es, disk sizes, etc. useful?
           export_name = export_info.get(constants.INISECT_INS, option)
@@ -8382,9 +8772,9 @@ class LUInstanceCreate(LogicalUnit):
 
       self.src_images = disk_images
 
-      old_name = export_info.get(constants.INISECT_INS, 'name')
+      old_name = export_info.get(constants.INISECT_INS, "name")
       try:
-        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
+        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
       except (TypeError, ValueError), err:
         raise errors.OpPrereqError("Invalid export file, nic_count is not"
                                    " an integer: %s" % str(err),
@@ -8392,7 +8782,7 @@ class LUInstanceCreate(LogicalUnit):
       if self.op.instance_name == old_name:
         for idx, nic in enumerate(self.nics):
           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
-            nic_mac_ini = 'nic%d_mac' % idx
+            nic_mac_ini = "nic%d_mac" % idx
             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
 
     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
@@ -8556,30 +8946,12 @@ class LUInstanceCreate(LogicalUnit):
     else:
       network_port = None
 
-    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
-      # this is needed because os.path.join does not accept None arguments
-      if self.op.file_storage_dir is None:
-        string_file_storage_dir = ""
-      else:
-        string_file_storage_dir = self.op.file_storage_dir
-
-      # build the full file storage dir path
-      if self.op.disk_template == constants.DT_SHARED_FILE:
-        get_fsd_fn = self.cfg.GetSharedFileStorageDir
-      else:
-        get_fsd_fn = self.cfg.GetFileStorageDir
-
-      file_storage_dir = utils.PathJoin(get_fsd_fn(),
-                                        string_file_storage_dir, instance)
-    else:
-      file_storage_dir = ""
-
     disks = _GenerateDiskTemplate(self,
                                   self.op.disk_template,
                                   instance, pnode_name,
                                   self.secondaries,
                                   self.disks,
-                                  file_storage_dir,
+                                  self.instance_file_storage_dir,
                                   self.op.file_driver,
                                   0,
                                   feedback_fn)
@@ -8594,16 +8966,19 @@ class LUInstanceCreate(LogicalUnit):
                             hvparams=self.op.hvparams,
                             hypervisor=self.op.hypervisor,
                             osparams=self.op.osparams,
-                            tags=self.op.tags,
                             )
 
+    if self.op.tags:
+      for tag in self.op.tags:
+        iobj.AddTag(tag)
+
     if self.adopt_disks:
       if self.op.disk_template == constants.DT_PLAIN:
         # rename LVs to the newly-generated names; we need to construct
         # 'fake' LV disks with the old data, plus the new unique_id
         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
         rename_to = []
-        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
+        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
           rename_to.append(t_dsk.logical_id)
           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
           self.cfg.SetDiskID(t_dsk, pnode_name)
@@ -8654,7 +9029,6 @@ class LUInstanceCreate(LogicalUnit):
       disk_abort = not _WaitForSync(self, iobj)
     elif iobj.disk_template in constants.DTS_INT_MIRROR:
       # make sure the disks are not degraded (still sync-ing is ok)
-      time.sleep(15)
       feedback_fn("* checking mirrors status")
       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
     else:
@@ -8671,12 +9045,33 @@ class LUInstanceCreate(LogicalUnit):
     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
       if self.op.mode == constants.INSTANCE_CREATE:
         if not self.op.no_install:
+          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
+                        not self.op.wait_for_sync)
+          if pause_sync:
+            feedback_fn("* pausing disk sync to install instance OS")
+            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
+                                                              iobj.disks, True)
+            for idx, success in enumerate(result.payload):
+              if not success:
+                logging.warn("pause-sync of instance %s for disk %d failed",
+                             instance, idx)
+
           feedback_fn("* running the instance OS create scripts...")
           # FIXME: pass debug option from opcode to backend
-          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
-                                                 self.op.debug_level)
-          result.Raise("Could not add os for instance %s"
-                       " on node %s" % (instance, pnode_name))
+          os_add_result = \
+            self.rpc.call_instance_os_add(pnode_name, iobj, False,
+                                          self.op.debug_level)
+          if pause_sync:
+            feedback_fn("* resuming disk sync")
+            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
+                                                              iobj.disks, False)
+            for idx, success in enumerate(result.payload):
+              if not success:
+                logging.warn("resume-sync of instance %s for disk %d failed",
+                             instance, idx)
+
+          os_add_result.Raise("Could not add os for instance %s"
+                              " on node %s" % (instance, pnode_name))
 
       elif self.op.mode == constants.INSTANCE_IMPORT:
         feedback_fn("* running the instance OS import scripts...")
@@ -8744,7 +9139,8 @@ class LUInstanceCreate(LogicalUnit):
       self.cfg.Update(iobj, feedback_fn)
       logging.info("Starting instance %s on node %s", instance, pnode_name)
       feedback_fn("* starting instance...")
-      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
+      result = self.rpc.call_instance_start(pnode_name, iobj,
+                                            None, None, False)
       result.Raise("Could not start instance")
 
     return list(iobj.all_nodes)
@@ -8880,7 +9276,7 @@ class LUInstanceReplaceDisks(LogicalUnit):
 
         # Lock member nodes of all locked groups
         self.needed_locks[locking.LEVEL_NODE] = [node_name
-          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
+          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
       else:
         self._LockInstancesNodes()
@@ -8920,16 +9316,9 @@ class LUInstanceReplaceDisks(LogicalUnit):
     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
             self.op.iallocator is None)
 
-    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
+    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
     if owned_groups:
-      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
-      if owned_groups != groups:
-        raise errors.OpExecError("Node groups used by instance '%s' changed"
-                                 " since lock was acquired, current list is %r,"
-                                 " used to be '%s'" %
-                                 (self.op.instance_name,
-                                  utils.CommaJoin(groups),
-                                  utils.CommaJoin(owned_groups)))
+      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
 
     return LogicalUnit.CheckPrereq(self)
 
@@ -8994,7 +9383,7 @@ class TLReplaceDisks(Tasklet):
     ial = IAllocator(lu.cfg, lu.rpc,
                      mode=constants.IALLOCATOR_MODE_RELOC,
                      name=instance_name,
-                     relocate_from=relocate_from)
+                     relocate_from=list(relocate_from))
 
     ial.Run(iallocator_name)
 
@@ -9018,6 +9407,9 @@ class TLReplaceDisks(Tasklet):
     return remote_node_name
 
   def _FindFaultyDisks(self, node_name):
+    """Wrapper for L{_FindFaultyInstanceDisks}.
+
+    """
     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
                                     node_name, True)
 
@@ -9088,7 +9480,7 @@ class TLReplaceDisks(Tasklet):
     if remote_node is None:
       self.remote_node_info = None
     else:
-      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
+      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
              "Remote node '%s' is not locked" % remote_node
 
       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
@@ -9194,9 +9586,8 @@ class TLReplaceDisks(Tasklet):
       instance.FindDisk(disk_idx)
 
     # Get secondary node IP addresses
-    self.node_secondary_ip = \
-      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
-           for node_name in touched_nodes)
+    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
+                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
 
   def Exec(self, feedback_fn):
     """Execute disk replacement.
@@ -9209,13 +9600,13 @@ class TLReplaceDisks(Tasklet):
 
     if __debug__:
       # Verify owned locks before starting operation
-      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
-      assert set(owned_locks) == set(self.node_secondary_ip), \
+      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
+      assert set(owned_nodes) == set(self.node_secondary_ip), \
           ("Incorrect node locks, owning %s, expected %s" %
-           (owned_locks, self.node_secondary_ip.keys()))
+           (owned_nodes, self.node_secondary_ip.keys()))
 
-      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
-      assert list(owned_locks) == [self.instance_name], \
+      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
+      assert list(owned_instances) == [self.instance_name], \
           "Instance '%s' not locked" % self.instance_name
 
       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
@@ -9250,12 +9641,12 @@ class TLReplaceDisks(Tasklet):
 
     if __debug__:
       # Verify owned locks
-      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
+      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
       nodes = frozenset(self.node_secondary_ip)
-      assert ((self.early_release and not owned_locks) or
-              (not self.early_release and not (set(owned_locks) - nodes))), \
+      assert ((self.early_release and not owned_nodes) or
+              (not self.early_release and not (set(owned_nodes) - nodes))), \
         ("Not owning the correct locks, early_release=%s, owned=%r,"
-         " nodes=%r" % (self.early_release, owned_locks, nodes))
+         " nodes=%r" % (self.early_release, owned_nodes, nodes))
 
     return result
 
@@ -9310,6 +9701,12 @@ class TLReplaceDisks(Tasklet):
                                  (node_name, self.instance.name))
 
   def _CreateNewStorage(self, node_name):
+    """Create new storage on the primary or secondary node.
+
+    This is only used for same-node replaces, not for changing the
+    secondary node, hence we don't want to modify the existing disk.
+
+    """
     iv_names = {}
 
     for idx, dev in enumerate(self.instance.disks):
@@ -9331,7 +9728,7 @@ class TLReplaceDisks(Tasklet):
                              logical_id=(vg_meta, names[1]))
 
       new_lvs = [lv_data, lv_meta]
-      old_lvs = dev.children
+      old_lvs = [child.Copy() for child in dev.children]
       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
 
       # we pass force_create=True to force the LVM creation
@@ -9369,7 +9766,7 @@ class TLReplaceDisks(Tasklet):
           self.lu.LogWarning("Can't remove old LV: %s" % msg,
                              hint="remove unused LVs manually")
 
-  def _ExecDrbd8DiskOnly(self, feedback_fn):
+  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
     """Replace a disk on the primary or secondary for DRBD 8.
 
     The algorithm for replace is quite complicated:
@@ -9452,10 +9849,14 @@ class TLReplaceDisks(Tasklet):
                                              rename_new_to_old)
       result.Raise("Can't rename new LVs on node %s" % self.target_node)
 
+      # Intermediate steps of in memory modifications
       for old, new in zip(old_lvs, new_lvs):
         new.logical_id = old.logical_id
         self.cfg.SetDiskID(new, self.target_node)
 
+      # We need to modify old_lvs so that removal later removes the
+      # right LVs, not the newly added ones; note that old_lvs is a
+      # copy here
       for disk in old_lvs:
         disk.logical_id = ren_fn(disk, temp_suffix)
         self.cfg.SetDiskID(disk, self.target_node)
@@ -9475,10 +9876,6 @@ class TLReplaceDisks(Tasklet):
                                      "volumes"))
         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
 
-      dev.children = new_lvs
-
-      self.cfg.Update(self.instance, feedback_fn)
-
     cstep = 5
     if self.early_release:
       self.lu.LogStep(cstep, steps_total, "Removing old storage")
@@ -9526,6 +9923,8 @@ class TLReplaceDisks(Tasklet):
     """
     steps_total = 6
 
+    pnode = self.instance.primary_node
+
     # Step: check device activation
     self.lu.LogStep(1, steps_total, "Check device existence")
     self._CheckDisksExistence([self.instance.primary_node])
@@ -9600,10 +9999,8 @@ class TLReplaceDisks(Tasklet):
                                  " soon as possible"))
 
     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
-    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
-                                               self.node_secondary_ip,
-                                               self.instance.disks)\
-                                              [self.instance.primary_node]
+    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
+                                               self.instance.disks)[pnode]
 
     msg = result.fail_msg
     if msg:
@@ -9727,8 +10124,8 @@ class LURepairNodeStorage(NoHooksLU):
                  (self.op.name, self.op.node_name))
 
 
-class LUNodeEvacStrategy(NoHooksLU):
-  """Computes the node evacuation strategy.
+class LUNodeEvacuate(NoHooksLU):
+  """Evacuates instances off a list of nodes.
 
   """
   REQ_BGL = False
@@ -9737,41 +10134,213 @@ class LUNodeEvacStrategy(NoHooksLU):
     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
 
   def ExpandNames(self):
-    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
-    self.needed_locks = locks = {}
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
+
+    if self.op.remote_node is not None:
+      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
+      assert self.op.remote_node
+
+      if self.op.remote_node == self.op.node_name:
+        raise errors.OpPrereqError("Can not use evacuated node as a new"
+                                   " secondary node", errors.ECODE_INVAL)
+
+      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
+        raise errors.OpPrereqError("Without the use of an iallocator only"
+                                   " secondary instances can be evacuated",
+                                   errors.ECODE_INVAL)
+
+    # Declare locks
+    self.share_locks = _ShareAll()
+    self.needed_locks = {
+      locking.LEVEL_INSTANCE: [],
+      locking.LEVEL_NODEGROUP: [],
+      locking.LEVEL_NODE: [],
+      }
+
     if self.op.remote_node is None:
-      locks[locking.LEVEL_NODE] = locking.ALL_SET
+      # Iallocator will choose any node(s) in the same group
+      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
     else:
-      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
-      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
+      group_nodes = frozenset([self.op.remote_node])
 
-  def Exec(self, feedback_fn):
-    instances = []
-    for node in self.op.nodes:
-      instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
-    if not instances:
-      return []
+    # Determine nodes to be locked
+    self.lock_nodes = set([self.op.node_name]) | group_nodes
+
+  def _DetermineInstances(self):
+    """Builds list of instances to operate on.
+
+    """
+    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
+
+    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
+      # Primary instances only
+      inst_fn = _GetNodePrimaryInstances
+      assert self.op.remote_node is None, \
+        "Evacuating primary instances requires iallocator"
+    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
+      # Secondary instances only
+      inst_fn = _GetNodeSecondaryInstances
+    else:
+      # All instances
+      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
+      inst_fn = _GetNodeInstances
+
+    return inst_fn(self.cfg, self.op.node_name)
+
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_INSTANCE:
+      # Lock instances optimistically, needs verification once node and group
+      # locks have been acquired
+      self.needed_locks[locking.LEVEL_INSTANCE] = \
+        set(i.name for i in self._DetermineInstances())
+
+    elif level == locking.LEVEL_NODEGROUP:
+      # Lock node groups optimistically, needs verification once nodes have
+      # been acquired
+      self.needed_locks[locking.LEVEL_NODEGROUP] = \
+        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
+
+    elif level == locking.LEVEL_NODE:
+      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
+
+  def CheckPrereq(self):
+    # Verify locks
+    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
+    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
+    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
+
+    assert owned_nodes == self.lock_nodes
+
+    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
+    if owned_groups != wanted_groups:
+      raise errors.OpExecError("Node groups changed since locks were acquired,"
+                               " current groups are '%s', used to be '%s'" %
+                               (utils.CommaJoin(wanted_groups),
+                                utils.CommaJoin(owned_groups)))
+
+    # Determine affected instances
+    self.instances = self._DetermineInstances()
+    self.instance_names = [i.name for i in self.instances]
+
+    if set(self.instance_names) != owned_instances:
+      raise errors.OpExecError("Instances on node '%s' changed since locks"
+                               " were acquired, current instances are '%s',"
+                               " used to be '%s'" %
+                               (self.op.node_name,
+                                utils.CommaJoin(self.instance_names),
+                                utils.CommaJoin(owned_instances)))
+
+    if self.instance_names:
+      self.LogInfo("Evacuating instances from node '%s': %s",
+                   self.op.node_name,
+                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
+    else:
+      self.LogInfo("No instances to evacuate from node '%s'",
+                   self.op.node_name)
 
     if self.op.remote_node is not None:
-      result = []
-      for i in instances:
+      for i in self.instances:
         if i.primary_node == self.op.remote_node:
           raise errors.OpPrereqError("Node %s is the primary node of"
                                      " instance %s, cannot use it as"
                                      " secondary" %
                                      (self.op.remote_node, i.name),
                                      errors.ECODE_INVAL)
-        result.append([i.name, self.op.remote_node])
-    else:
-      ial = IAllocator(self.cfg, self.rpc,
-                       mode=constants.IALLOCATOR_MODE_MEVAC,
-                       evac_nodes=self.op.nodes)
-      ial.Run(self.op.iallocator, validate=True)
+
+  def Exec(self, feedback_fn):
+    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
+
+    if not self.instance_names:
+      # No instances to evacuate
+      jobs = []
+
+    elif self.op.iallocator is not None:
+      # TODO: Implement relocation to other group
+      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
+                       evac_mode=self.op.mode,
+                       instances=list(self.instance_names))
+
+      ial.Run(self.op.iallocator)
+
       if not ial.success:
-        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
-                                 errors.ECODE_NORES)
-      result = ial.result
-    return result
+        raise errors.OpPrereqError("Can't compute node evacuation using"
+                                   " iallocator '%s': %s" %
+                                   (self.op.iallocator, ial.info),
+                                   errors.ECODE_NORES)
+
+      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
+
+    elif self.op.remote_node is not None:
+      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
+      jobs = [
+        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
+                                        remote_node=self.op.remote_node,
+                                        disks=[],
+                                        mode=constants.REPLACE_DISK_CHG,
+                                        early_release=self.op.early_release)]
+        for instance_name in self.instance_names
+        ]
+
+    else:
+      raise errors.ProgrammerError("No iallocator or remote node")
+
+    return ResultWithJobs(jobs)
+
+
+def _SetOpEarlyRelease(early_release, op):
+  """Sets C{early_release} flag on opcodes if available.
+
+  """
+  try:
+    op.early_release = early_release
+  except AttributeError:
+    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
+
+  return op
+
+
+def _NodeEvacDest(use_nodes, group, nodes):
+  """Returns group or nodes depending on caller's choice.
+
+  """
+  if use_nodes:
+    return utils.CommaJoin(nodes)
+  else:
+    return group
+
+
+def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
+  """Unpacks the result of change-group and node-evacuate iallocator requests.
+
+  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
+  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
+
+  @type lu: L{LogicalUnit}
+  @param lu: Logical unit instance
+  @type alloc_result: tuple/list
+  @param alloc_result: Result from iallocator
+  @type early_release: bool
+  @param early_release: Whether to release locks early if possible
+  @type use_nodes: bool
+  @param use_nodes: Whether to display node names instead of groups
+
+  """
+  (moved, failed, jobs) = alloc_result
+
+  if failed:
+    lu.LogWarning("Unable to evacuate instances %s",
+                  utils.CommaJoin("%s (%s)" % (name, reason)
+                                  for (name, reason) in failed))
+
+  if moved:
+    lu.LogInfo("Instances to be moved: %s",
+               utils.CommaJoin("%s (to %s)" %
+                               (name, _NodeEvacDest(use_nodes, group, nodes))
+                               for (name, group, nodes) in moved))
+
+  return [map(compat.partial(_SetOpEarlyRelease, early_release),
+              map(opcodes.OpCode.LoadOpCode, ops))
+          for ops in jobs]
 
 
 class LUInstanceGrowDisk(LogicalUnit):
@@ -9907,7 +10476,7 @@ class LUInstanceQueryData(NoHooksLU):
       self.wanted_names = None
 
     if self.op.use_locking:
-      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+      self.share_locks = _ShareAll()
 
       if self.wanted_names is None:
         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
@@ -9915,7 +10484,6 @@ class LUInstanceQueryData(NoHooksLU):
         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
 
       self.needed_locks[locking.LEVEL_NODE] = []
-      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
 
   def DeclareLocks(self, level):
@@ -9930,10 +10498,10 @@ class LUInstanceQueryData(NoHooksLU):
     """
     if self.wanted_names is None:
       assert self.op.use_locking, "Locking was not used"
-      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
+      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
 
-    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
-                             for name in self.wanted_names]
+    self.wanted_instances = \
+        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
 
   def _ComputeBlockdevStatus(self, node, instance_name, dev):
     """Returns the status of a block device
@@ -9974,8 +10542,9 @@ class LUInstanceQueryData(NoHooksLU):
     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
 
     if dev.children:
-      dev_children = [self._ComputeDiskStatus(instance, snode, child)
-                      for child in dev.children]
+      dev_children = map(compat.partial(self._ComputeDiskStatus,
+                                        instance, snode),
+                         dev.children)
     else:
       dev_children = []
 
@@ -9997,8 +10566,16 @@ class LUInstanceQueryData(NoHooksLU):
 
     cluster = self.cfg.GetClusterInfo()
 
-    for instance in self.wanted_instances:
-      if not self.op.static:
+    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
+                                          for i in self.wanted_instances)
+    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
+      if self.op.static or pnode.offline:
+        remote_state = None
+        if pnode.offline:
+          self.LogWarning("Primary node %s is marked offline, returning static"
+                          " information only for instance %s" %
+                          (pnode.name, instance.name))
+      else:
         remote_info = self.rpc.call_instance_info(instance.primary_node,
                                                   instance.name,
                                                   instance.hypervisor)
@@ -10008,15 +10585,14 @@ class LUInstanceQueryData(NoHooksLU):
           remote_state = "up"
         else:
           remote_state = "down"
-      else:
-        remote_state = None
+
       if instance.admin_up:
         config_state = "up"
       else:
         config_state = "down"
 
-      disks = [self._ComputeDiskStatus(instance, None, device)
-               for device in instance.disks]
+      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
+                  instance.disks)
 
       result[instance.name] = {
         "name": instance.name,
@@ -10141,13 +10717,13 @@ class LUInstanceSetParams(LogicalUnit):
             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
                                        errors.ECODE_INVAL)
 
-      nic_bridge = nic_dict.get('bridge', None)
+      nic_bridge = nic_dict.get("bridge", None)
       nic_link = nic_dict.get(constants.INIC_LINK, None)
       if nic_bridge and nic_link:
         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
                                    " at the same time", errors.ECODE_INVAL)
       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
-        nic_dict['bridge'] = None
+        nic_dict["bridge"] = None
       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
         nic_dict[constants.INIC_LINK] = None
 
@@ -10190,13 +10766,13 @@ class LUInstanceSetParams(LogicalUnit):
     """
     args = dict()
     if constants.BE_MEMORY in self.be_new:
-      args['memory'] = self.be_new[constants.BE_MEMORY]
+      args["memory"] = self.be_new[constants.BE_MEMORY]
     if constants.BE_VCPUS in self.be_new:
-      args['vcpus'] = self.be_new[constants.BE_VCPUS]
+      args["vcpus"] = self.be_new[constants.BE_VCPUS]
     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
     # information at all.
     if self.op.nics:
-      args['nics'] = []
+      args["nics"] = []
       nic_override = dict(self.op.nics)
       for idx, nic in enumerate(self.instance.nics):
         if idx in nic_override:
@@ -10217,16 +10793,16 @@ class LUInstanceSetParams(LogicalUnit):
           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
         mode = nicparams[constants.NIC_MODE]
         link = nicparams[constants.NIC_LINK]
-        args['nics'].append((ip, mac, mode, link))
+        args["nics"].append((ip, mac, mode, link))
       if constants.DDM_ADD in nic_override:
         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
         nicparams = self.nic_pnew[constants.DDM_ADD]
         mode = nicparams[constants.NIC_MODE]
         link = nicparams[constants.NIC_LINK]
-        args['nics'].append((ip, mac, mode, link))
+        args["nics"].append((ip, mac, mode, link))
       elif constants.DDM_REMOVE in nic_override:
-        del args['nics'][-1]
+        del args["nics"][-1]
 
     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
     if self.op.disk_template:
@@ -10343,8 +10919,8 @@ class LUInstanceSetParams(LogicalUnit):
       if msg:
         # Assume the primary node is unreachable and go ahead
         self.warn.append("Can't get info from primary node %s: %s" %
-                         (pnode,  msg))
-      elif not isinstance(pninfo.payload.get('memory_free', None), int):
+                         (pnode, msg))
+      elif not isinstance(pninfo.payload.get("memory_free", None), int):
         self.warn.append("Node data from primary node %s doesn't contain"
                          " free memory information" % pnode)
       elif instance_info.fail_msg:
@@ -10352,14 +10928,14 @@ class LUInstanceSetParams(LogicalUnit):
                         instance_info.fail_msg)
       else:
         if instance_info.payload:
-          current_mem = int(instance_info.payload['memory'])
+          current_mem = int(instance_info.payload["memory"])
         else:
           # Assume instance not running
           # (there is a slight race condition here, but it's not very probable,
           # and we have no other way to check)
           current_mem = 0
         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
-                    pninfo.payload['memory_free'])
+                    pninfo.payload["memory_free"])
         if miss_mem > 0:
           raise errors.OpPrereqError("This change will prevent the instance"
                                      " from starting, due to %d MB of memory"
@@ -10372,11 +10948,11 @@ class LUInstanceSetParams(LogicalUnit):
             continue
           nres.Raise("Can't get info from secondary node %s" % node,
                      prereq=True, ecode=errors.ECODE_STATE)
-          if not isinstance(nres.payload.get('memory_free', None), int):
+          if not isinstance(nres.payload.get("memory_free", None), int):
             raise errors.OpPrereqError("Secondary node %s didn't return free"
                                        " memory information" % node,
                                        errors.ECODE_STATE)
-          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
+          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
             raise errors.OpPrereqError("This change will prevent the instance"
                                        " from failover to its secondary node"
                                        " %s, due to not enough memory" % node,
@@ -10412,8 +10988,8 @@ class LUInstanceSetParams(LogicalUnit):
                                  for key in constants.NICS_PARAMETERS
                                  if key in nic_dict])
 
-      if 'bridge' in nic_dict:
-        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
+      if "bridge" in nic_dict:
+        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
 
       new_nic_params = _GetUpdatedParams(old_nic_params,
                                          update_params_dict)
@@ -10439,12 +11015,12 @@ class LUInstanceSetParams(LogicalUnit):
         else:
           nic_ip = old_nic_ip
         if nic_ip is None:
-          raise errors.OpPrereqError('Cannot set the nic ip to None'
-                                     ' on a routed nic', errors.ECODE_INVAL)
+          raise errors.OpPrereqError("Cannot set the nic ip to None"
+                                     " on a routed nic", errors.ECODE_INVAL)
       if constants.INIC_MAC in nic_dict:
         nic_mac = nic_dict[constants.INIC_MAC]
         if nic_mac is None:
-          raise errors.OpPrereqError('Cannot set the nic mac to None',
+          raise errors.OpPrereqError("Cannot set the nic mac to None",
                                      errors.ECODE_INVAL)
         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
           # otherwise generate the mac
@@ -10718,6 +11294,147 @@ class LUInstanceSetParams(LogicalUnit):
     }
 
 
+class LUInstanceChangeGroup(LogicalUnit):
+  HPATH = "instance-change-group"
+  HTYPE = constants.HTYPE_INSTANCE
+  REQ_BGL = False
+
+  def ExpandNames(self):
+    self.share_locks = _ShareAll()
+    self.needed_locks = {
+      locking.LEVEL_NODEGROUP: [],
+      locking.LEVEL_NODE: [],
+      }
+
+    self._ExpandAndLockInstance()
+
+    if self.op.target_groups:
+      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
+                                  self.op.target_groups)
+    else:
+      self.req_target_uuids = None
+
+    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
+
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_NODEGROUP:
+      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
+
+      if self.req_target_uuids:
+        lock_groups = set(self.req_target_uuids)
+
+        # Lock all groups used by instance optimistically; this requires going
+        # via the node before it's locked, requiring verification later on
+        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
+        lock_groups.update(instance_groups)
+      else:
+        # No target groups, need to lock all of them
+        lock_groups = locking.ALL_SET
+
+      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
+
+    elif level == locking.LEVEL_NODE:
+      if self.req_target_uuids:
+        # Lock all nodes used by instances
+        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+        self._LockInstancesNodes()
+
+        # Lock all nodes in all potential target groups
+        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
+                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
+        member_nodes = [node_name
+                        for group in lock_groups
+                        for node_name in self.cfg.GetNodeGroup(group).members]
+        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
+      else:
+        # Lock all nodes as all groups are potential targets
+        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+
+  def CheckPrereq(self):
+    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
+
+    assert (self.req_target_uuids is None or
+            owned_groups.issuperset(self.req_target_uuids))
+    assert owned_instances == set([self.op.instance_name])
+
+    # Get instance information
+    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+
+    # Check if node groups for locked instance are still correct
+    assert owned_nodes.issuperset(self.instance.all_nodes), \
+      ("Instance %s's nodes changed while we kept the lock" %
+       self.op.instance_name)
+
+    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
+                                           owned_groups)
+
+    if self.req_target_uuids:
+      # User requested specific target groups
+      self.target_uuids = self.req_target_uuids
+    else:
+      # All groups except those used by the instance are potential targets
+      self.target_uuids = owned_groups - inst_groups
+
+    conflicting_groups = self.target_uuids & inst_groups
+    if conflicting_groups:
+      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
+                                 " used by the instance '%s'" %
+                                 (utils.CommaJoin(conflicting_groups),
+                                  self.op.instance_name),
+                                 errors.ECODE_INVAL)
+
+    if not self.target_uuids:
+      raise errors.OpPrereqError("There are no possible target groups",
+                                 errors.ECODE_INVAL)
+
+  def BuildHooksEnv(self):
+    """Build hooks env.
+
+    """
+    assert self.target_uuids
+
+    env = {
+      "TARGET_GROUPS": " ".join(self.target_uuids),
+      }
+
+    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
+
+    return env
+
+  def BuildHooksNodes(self):
+    """Build hooks nodes.
+
+    """
+    mn = self.cfg.GetMasterNode()
+    return ([mn], [mn])
+
+  def Exec(self, feedback_fn):
+    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
+
+    assert instances == [self.op.instance_name], "Instance not locked"
+
+    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
+                     instances=instances, target_groups=list(self.target_uuids))
+
+    ial.Run(self.op.iallocator)
+
+    if not ial.success:
+      raise errors.OpPrereqError("Can't compute solution for changing group of"
+                                 " instance '%s' using iallocator '%s': %s" %
+                                 (self.op.instance_name, self.op.iallocator,
+                                  ial.info),
+                                 errors.ECODE_NORES)
+
+    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
+
+    self.LogInfo("Iallocator returned %s job(s) for changing group of"
+                 " instance '%s'", len(jobs), self.op.instance_name)
+
+    return ResultWithJobs(jobs)
+
+
 class LUBackupQuery(NoHooksLU):
   """Query the exports list
 
@@ -10742,7 +11459,7 @@ class LUBackupQuery(NoHooksLU):
         that node.
 
     """
-    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
+    self.nodes = self.owned_locks(locking.LEVEL_NODE)
     rpcresult = self.rpc.call_export_list(self.nodes)
     result = {}
     for node in rpcresult:
@@ -11038,7 +11755,8 @@ class LUBackupExport(LogicalUnit):
             not self.op.remove_instance):
           assert not activate_disks
           feedback_fn("Starting instance %s" % instance.name)
-          result = self.rpc.call_instance_start(src_node, instance, None, None)
+          result = self.rpc.call_instance_start(src_node, instance,
+                                                None, None, False)
           msg = result.fail_msg
           if msg:
             feedback_fn("Failed to start instance: %s" % msg)
@@ -11124,7 +11842,7 @@ class LUBackupRemove(NoHooksLU):
       fqdn_warn = True
       instance_name = self.op.instance_name
 
-    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
+    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
     exportlist = self.rpc.call_export_list(locked_nodes)
     found = False
     for node in exportlist:
@@ -11244,12 +11962,12 @@ class LUGroupAssignNodes(NoHooksLU):
 
     """
     assert self.needed_locks[locking.LEVEL_NODEGROUP]
-    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
+    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
             frozenset(self.op.nodes))
 
     expected_locks = (set([self.group_uuid]) |
                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
-    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
+    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
     if actual_locks != expected_locks:
       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
                                " current groups are '%s', used to be '%s'" %
@@ -11437,6 +12155,9 @@ class LUGroupQuery(NoHooksLU):
   def ExpandNames(self):
     self.gq.ExpandNames(self)
 
+  def DeclareLocks(self, level):
+    self.gq.DeclareLocks(self, level)
+
   def Exec(self, feedback_fn):
     return self.gq.OldStyleQuery(self)
 
@@ -11515,7 +12236,6 @@ class LUGroupSetParams(LogicalUnit):
     return result
 
 
-
 class LUGroupRemove(LogicalUnit):
   HPATH = "group-remove"
   HTYPE = constants.HTYPE_GROUP
@@ -11652,7 +12372,163 @@ class LUGroupRename(LogicalUnit):
     return self.op.new_name
 
 
-class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
+class LUGroupEvacuate(LogicalUnit):
+  HPATH = "group-evacuate"
+  HTYPE = constants.HTYPE_GROUP
+  REQ_BGL = False
+
+  def ExpandNames(self):
+    # This raises errors.OpPrereqError on its own:
+    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
+
+    if self.op.target_groups:
+      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
+                                  self.op.target_groups)
+    else:
+      self.req_target_uuids = []
+
+    if self.group_uuid in self.req_target_uuids:
+      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
+                                 " as a target group (targets are %s)" %
+                                 (self.group_uuid,
+                                  utils.CommaJoin(self.req_target_uuids)),
+                                 errors.ECODE_INVAL)
+
+    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
+
+    self.share_locks = _ShareAll()
+    self.needed_locks = {
+      locking.LEVEL_INSTANCE: [],
+      locking.LEVEL_NODEGROUP: [],
+      locking.LEVEL_NODE: [],
+      }
+
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_INSTANCE:
+      assert not self.needed_locks[locking.LEVEL_INSTANCE]
+
+      # Lock instances optimistically, needs verification once node and group
+      # locks have been acquired
+      self.needed_locks[locking.LEVEL_INSTANCE] = \
+        self.cfg.GetNodeGroupInstances(self.group_uuid)
+
+    elif level == locking.LEVEL_NODEGROUP:
+      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
+
+      if self.req_target_uuids:
+        lock_groups = set([self.group_uuid] + self.req_target_uuids)
+
+        # Lock all groups used by instances optimistically; this requires going
+        # via the node before it's locked, requiring verification later on
+        lock_groups.update(group_uuid
+                           for instance_name in
+                             self.owned_locks(locking.LEVEL_INSTANCE)
+                           for group_uuid in
+                             self.cfg.GetInstanceNodeGroups(instance_name))
+      else:
+        # No target groups, need to lock all of them
+        lock_groups = locking.ALL_SET
+
+      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
+
+    elif level == locking.LEVEL_NODE:
+      # This will only lock the nodes in the group to be evacuated which
+      # contain actual instances
+      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+      self._LockInstancesNodes()
+
+      # Lock all nodes in group to be evacuated and target groups
+      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+      assert self.group_uuid in owned_groups
+      member_nodes = [node_name
+                      for group in owned_groups
+                      for node_name in self.cfg.GetNodeGroup(group).members]
+      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
+
+  def CheckPrereq(self):
+    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
+
+    assert owned_groups.issuperset(self.req_target_uuids)
+    assert self.group_uuid in owned_groups
+
+    # Check if locked instances are still correct
+    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
+
+    # Get instance information
+    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
+
+    # Check if node groups for locked instances are still correct
+    for instance_name in owned_instances:
+      inst = self.instances[instance_name]
+      assert owned_nodes.issuperset(inst.all_nodes), \
+        "Instance %s's nodes changed while we kept the lock" % instance_name
+
+      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
+                                             owned_groups)
+
+      assert self.group_uuid in inst_groups, \
+        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
+
+    if self.req_target_uuids:
+      # User requested specific target groups
+      self.target_uuids = self.req_target_uuids
+    else:
+      # All groups except the one to be evacuated are potential targets
+      self.target_uuids = [group_uuid for group_uuid in owned_groups
+                           if group_uuid != self.group_uuid]
+
+      if not self.target_uuids:
+        raise errors.OpPrereqError("There are no possible target groups",
+                                   errors.ECODE_INVAL)
+
+  def BuildHooksEnv(self):
+    """Build hooks env.
+
+    """
+    return {
+      "GROUP_NAME": self.op.group_name,
+      "TARGET_GROUPS": " ".join(self.target_uuids),
+      }
+
+  def BuildHooksNodes(self):
+    """Build hooks nodes.
+
+    """
+    mn = self.cfg.GetMasterNode()
+
+    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
+
+    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
+
+    return (run_nodes, run_nodes)
+
+  def Exec(self, feedback_fn):
+    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
+
+    assert self.group_uuid not in self.target_uuids
+
+    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
+                     instances=instances, target_groups=self.target_uuids)
+
+    ial.Run(self.op.iallocator)
+
+    if not ial.success:
+      raise errors.OpPrereqError("Can't compute group evacuation using"
+                                 " iallocator '%s': %s" %
+                                 (self.op.iallocator, ial.info),
+                                 errors.ECODE_NORES)
+
+    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
+
+    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
+                 len(jobs), self.op.group_name)
+
+    return ResultWithJobs(jobs)
+
+
+class TagsLU(NoHooksLU): # pylint: disable=W0223
   """Generic tags LU.
 
   This is an abstract class which is the parent of all the other tags LUs.
@@ -11700,7 +12576,7 @@ class LUTagsGet(TagsLU):
     TagsLU.ExpandNames(self)
 
     # Share locks as this is only a read operation
-    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+    self.share_locks = _ShareAll()
 
   def Exec(self, feedback_fn):
     """Returns the tag list.
@@ -11912,7 +12788,7 @@ class LUTestJqueue(NoHooksLU):
     # Wait for client to close
     try:
       try:
-        # pylint: disable-msg=E1101
+        # pylint: disable=E1101
         # Instance of '_socketobject' has no ... member
         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
         conn.recv(1)
@@ -12009,7 +12885,7 @@ class IAllocator(object):
       easy usage
 
   """
-  # pylint: disable-msg=R0902
+  # pylint: disable=R0902
   # lots of instance attributes
 
   def __init__(self, cfg, rpc, mode, **kwargs):
@@ -12024,9 +12900,8 @@ class IAllocator(object):
     self.hypervisor = None
     self.relocate_from = None
     self.name = None
-    self.evac_nodes = None
     self.instances = None
-    self.reloc_mode = None
+    self.evac_mode = None
     self.target_groups = []
     # computed fields
     self.required_nodes = None
@@ -12080,8 +12955,7 @@ class IAllocator(object):
       hypervisor_name = self.hypervisor
     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
-    elif self.mode in (constants.IALLOCATOR_MODE_MEVAC,
-                       constants.IALLOCATOR_MODE_MRELOC):
+    else:
       hypervisor_name = cluster_info.enabled_hypervisors[0]
 
     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
@@ -12159,8 +13033,8 @@ class IAllocator(object):
                                 nname)
         remote_info = nresult.payload
 
-        for attr in ['memory_total', 'memory_free', 'memory_dom0',
-                     'vg_size', 'vg_free', 'cpu_total']:
+        for attr in ["memory_total", "memory_free", "memory_dom0",
+                     "vg_size", "vg_free", "cpu_total"]:
           if attr not in remote_info:
             raise errors.OpExecError("Node '%s' didn't return attribute"
                                      " '%s'" % (nname, attr))
@@ -12176,21 +13050,21 @@ class IAllocator(object):
             if iinfo.name not in node_iinfo[nname].payload:
               i_used_mem = 0
             else:
-              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
+              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
-            remote_info['memory_free'] -= max(0, i_mem_diff)
+            remote_info["memory_free"] -= max(0, i_mem_diff)
 
             if iinfo.admin_up:
               i_p_up_mem += beinfo[constants.BE_MEMORY]
 
         # compute memory used by instances
         pnr_dyn = {
-          "total_memory": remote_info['memory_total'],
-          "reserved_memory": remote_info['memory_dom0'],
-          "free_memory": remote_info['memory_free'],
-          "total_disk": remote_info['vg_size'],
-          "free_disk": remote_info['vg_free'],
-          "total_cpus": remote_info['cpu_total'],
+          "total_memory": remote_info["memory_total"],
+          "reserved_memory": remote_info["memory_dom0"],
+          "free_memory": remote_info["memory_free"],
+          "total_disk": remote_info["vg_size"],
+          "free_disk": remote_info["vg_free"],
+          "total_cpus": remote_info["cpu_total"],
           "i_pri_memory": i_p_mem,
           "i_pri_up_memory": i_p_up_mem,
           }
@@ -12307,22 +13181,21 @@ class IAllocator(object):
       }
     return request
 
-  def _AddEvacuateNodes(self):
-    """Add evacuate nodes data to allocator structure.
+  def _AddNodeEvacuate(self):
+    """Get data for node-evacuate requests.
 
     """
-    request = {
-      "evac_nodes": self.evac_nodes
+    return {
+      "instances": self.instances,
+      "evac_mode": self.evac_mode,
       }
-    return request
 
-  def _AddMultiRelocate(self):
-    """Get data for multi-relocate requests.
+  def _AddChangeGroup(self):
+    """Get data for node-evacuate requests.
 
     """
     return {
       "instances": self.instances,
-      "reloc_mode": self.reloc_mode,
       "target_groups": self.target_groups,
       }
 
@@ -12348,6 +13221,28 @@ class IAllocator(object):
     self.in_text = serializer.Dump(self.in_data)
 
   _STRING_LIST = ht.TListOf(ht.TString)
+  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
+     # pylint: disable=E1101
+     # Class '...' has no 'OP_ID' member
+     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
+                          opcodes.OpInstanceMigrate.OP_ID,
+                          opcodes.OpInstanceReplaceDisks.OP_ID])
+     })))
+
+  _NEVAC_MOVED = \
+    ht.TListOf(ht.TAnd(ht.TIsLength(3),
+                       ht.TItems([ht.TNonEmptyString,
+                                  ht.TNonEmptyString,
+                                  ht.TListOf(ht.TNonEmptyString),
+                                 ])))
+  _NEVAC_FAILED = \
+    ht.TListOf(ht.TAnd(ht.TIsLength(2),
+                       ht.TItems([ht.TNonEmptyString,
+                                  ht.TMaybeString,
+                                 ])))
+  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
+                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
+
   _MODE_DATA = {
     constants.IALLOCATOR_MODE_ALLOC:
       (_AddNewInstance,
@@ -12366,22 +13261,16 @@ class IAllocator(object):
       (_AddRelocateInstance,
        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
        ht.TList),
-    constants.IALLOCATOR_MODE_MEVAC:
-      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
-       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
-    constants.IALLOCATOR_MODE_MRELOC:
-      (_AddMultiRelocate, [
+     constants.IALLOCATOR_MODE_NODE_EVAC:
+      (_AddNodeEvacuate, [
+        ("instances", _STRING_LIST),
+        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
+        ], _NEVAC_RESULT),
+     constants.IALLOCATOR_MODE_CHG_GROUP:
+      (_AddChangeGroup, [
         ("instances", _STRING_LIST),
-        ("reloc_mode", ht.TElemOf(constants.IALLOCATOR_MRELOC_MODES)),
         ("target_groups", _STRING_LIST),
-        ],
-       ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
-         # pylint: disable-msg=E1101
-         # Class '...' has no 'OP_ID' member
-         "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
-                              opcodes.OpInstanceMigrate.OP_ID,
-                              opcodes.OpInstanceReplaceDisks.OP_ID])
-         })))),
+        ], _NEVAC_RESULT),
     }
 
   def Run(self, name, validate=True, call_fn=None):
@@ -12430,39 +13319,28 @@ class IAllocator(object):
                                (self._result_check, self.result),
                                errors.ECODE_INVAL)
 
-    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
-                     constants.IALLOCATOR_MODE_MEVAC):
+    if self.mode == constants.IALLOCATOR_MODE_RELOC:
+      assert self.relocate_from is not None
+      assert self.required_nodes == 1
+
       node2group = dict((name, ndata["group"])
                         for (name, ndata) in self.in_data["nodes"].items())
 
       fn = compat.partial(self._NodesToGroups, node2group,
                           self.in_data["nodegroups"])
 
-      if self.mode == constants.IALLOCATOR_MODE_RELOC:
-        assert self.relocate_from is not None
-        assert self.required_nodes == 1
-
-        request_groups = fn(self.relocate_from)
-        result_groups = fn(rdict["result"])
-
-        if result_groups != request_groups:
-          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
-                                   " differ from original groups (%s)" %
-                                   (utils.CommaJoin(result_groups),
-                                    utils.CommaJoin(request_groups)))
-      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
-        request_groups = fn(self.evac_nodes)
-        for (instance_name, secnode) in self.result:
-          result_groups = fn([secnode])
-          if result_groups != request_groups:
-            raise errors.OpExecError("Iallocator returned new secondary node"
-                                     " '%s' (group '%s') for instance '%s'"
-                                     " which is not in original group '%s'" %
-                                     (secnode, utils.CommaJoin(result_groups),
-                                      instance_name,
-                                      utils.CommaJoin(request_groups)))
-      else:
-        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
+      instance = self.cfg.GetInstanceInfo(self.name)
+      request_groups = fn(self.relocate_from + [instance.primary_node])
+      result_groups = fn(rdict["result"] + [instance.primary_node])
+
+      if self.success and not set(result_groups).issubset(request_groups):
+        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
+                                 " differ from original groups (%s)" %
+                                 (utils.CommaJoin(result_groups),
+                                  utils.CommaJoin(request_groups)))
+
+    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
+      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
 
     self.out_data = rdict
 
@@ -12541,17 +13419,13 @@ class LUTestAllocator(NoHooksLU):
     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
       fname = _ExpandInstanceName(self.cfg, self.op.name)
       self.op.name = fname
-      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
-    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
-      if not hasattr(self.op, "evac_nodes"):
-        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
-                                   " opcode input", errors.ECODE_INVAL)
-    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
-      if self.op.instances:
-        self.op.instances = _GetWantedInstances(self, self.op.instances)
-      else:
-        raise errors.OpPrereqError("Missing instances to relocate",
-                                   errors.ECODE_INVAL)
+      self.relocate_from = \
+          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
+    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
+                          constants.IALLOCATOR_MODE_NODE_EVAC):
+      if not self.op.instances:
+        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
+      self.op.instances = _GetWantedInstances(self, self.op.instances)
     else:
       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
                                  self.op.mode, errors.ECODE_INVAL)
@@ -12587,16 +13461,16 @@ class LUTestAllocator(NoHooksLU):
                        name=self.op.name,
                        relocate_from=list(self.relocate_from),
                        )
-    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
+    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
       ial = IAllocator(self.cfg, self.rpc,
                        mode=self.op.mode,
-                       evac_nodes=self.op.evac_nodes)
-    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
+                       instances=self.op.instances,
+                       target_groups=self.op.target_groups)
+    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
       ial = IAllocator(self.cfg, self.rpc,
                        mode=self.op.mode,
                        instances=self.op.instances,
-                       reloc_mode=self.op.reloc_mode,
-                       target_groups=self.op.target_groups)
+                       evac_mode=self.op.evac_mode)
     else:
       raise errors.ProgrammerError("Uncatched mode %s in"
                                    " LUTestAllocator.Exec", self.op.mode)