gnt-cluster: Add hv/disk state to init

[ganeti-local] / lib / cmdlib.py
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index 512c228..04d26f9 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -21,12 +21,12 @@
  
  """Module implementing the master-side code."""
  
-# pylint: disable-msg=W0201,C0302
+# pylint: disable=W0201,C0302
  
  # W0201 since most LU attributes are defined in CheckPrereq or similar
  # functions
  
-# C0302: since we have waaaay to many lines in this module
+# C0302: since we have waaaay too many lines in this module
  
  import os
  import os.path
@@ -40,6 +40,7 @@ import socket
  import tempfile
  import shutil
  import itertools
+import operator
  
  from ganeti import ssh
  from ganeti import utils
@@ -58,21 +59,20 @@ from ganeti import query
  from ganeti import qlang
  from ganeti import opcodes
  from ganeti import ht
+from ganeti import rpc
  
-import ganeti.masterd.instance # pylint: disable-msg=W0611
+import ganeti.masterd.instance # pylint: disable=W0611
  
  
-def _SupportsOob(cfg, node):
-  """Tells if node supports OOB.
-
-  @type cfg: L{config.ConfigWriter}
-  @param cfg: The cluster configuration
-  @type node: L{objects.Node}
-  @param node: The node
-  @return: The OOB script if supported or an empty string otherwise
+#: Size of DRBD meta block device
+DRBD_META_SIZE = 128
  
-  """
-  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
+# States of instance
+INSTANCE_UP = [constants.ADMINST_UP]
+INSTANCE_DOWN = [constants.ADMINST_DOWN]
+INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
+INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
+INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  
  
  class ResultWithJobs:
@@ -120,7 +120,7 @@ class LogicalUnit(object):
    HTYPE = None
    REQ_BGL = True
  
-  def __init__(self, processor, op, context, rpc):
+  def __init__(self, processor, op, context, rpc_runner):
      """Constructor for LogicalUnit.
  
      This needs to be overridden in derived classes in order to check op
@@ -131,8 +131,10 @@ class LogicalUnit(object):
      self.op = op
      self.cfg = context.cfg
      self.glm = context.glm
+    # readability alias
+    self.owned_locks = context.glm.list_owned
      self.context = context
-    self.rpc = rpc
+    self.rpc = rpc_runner
      # Dicts used to declare locking needs to mcpu
      self.needed_locks = None
      self.share_locks = dict.fromkeys(locking.LEVELS, 0)
@@ -141,10 +143,10 @@ class LogicalUnit(object):
      # Used to force good behavior when calling helper functions
      self.recalculate_locks = {}
      # logging
-    self.Log = processor.Log # pylint: disable-msg=C0103
-    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
-    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
-    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
+    self.Log = processor.Log # pylint: disable=C0103
+    self.LogWarning = processor.LogWarning # pylint: disable=C0103
+    self.LogInfo = processor.LogInfo # pylint: disable=C0103
+    self.LogStep = processor.LogStep # pylint: disable=C0103
      # support for dry-run
      self.dry_run_result = None
      # support for generic debug attribute
@@ -332,7 +334,7 @@ class LogicalUnit(object):
      """
      # API must be kept, thus we ignore the unused argument and could
      # be a function warnings
-    # pylint: disable-msg=W0613,R0201
+    # pylint: disable=W0613,R0201
      return lu_result
  
    def _ExpandAndLockInstance(self):
@@ -354,7 +356,8 @@ class LogicalUnit(object):
                                                  self.op.instance_name)
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
  
-  def _LockInstancesNodes(self, primary_only=False):
+  def _LockInstancesNodes(self, primary_only=False,
+                          level=locking.LEVEL_NODE):
      """Helper function to declare instances' nodes for locking.
  
      This function should be called after locking one or more instances to lock
@@ -375,9 +378,10 @@ class LogicalUnit(object):
  
      @type primary_only: boolean
      @param primary_only: only lock primary nodes of locked instances
+    @param level: Which lock level to use for locking nodes
  
      """
-    assert locking.LEVEL_NODE in self.recalculate_locks, \
+    assert level in self.recalculate_locks, \
        "_LockInstancesNodes helper function called with no nodes to recalculate"
  
      # TODO: check if we're really been called with the instance locks held
@@ -386,21 +390,23 @@ class LogicalUnit(object):
      # future we might want to have different behaviors depending on the value
      # of self.recalculate_locks[locking.LEVEL_NODE]
      wanted_nodes = []
-    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
-      instance = self.context.cfg.GetInstanceInfo(instance_name)
+    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
+    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
        wanted_nodes.append(instance.primary_node)
        if not primary_only:
          wanted_nodes.extend(instance.secondary_nodes)
  
-    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
-      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
-    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
-      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
+    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
+      self.needed_locks[level] = wanted_nodes
+    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
+      self.needed_locks[level].extend(wanted_nodes)
+    else:
+      raise errors.ProgrammerError("Unknown recalculation mode")
  
-    del self.recalculate_locks[locking.LEVEL_NODE]
+    del self.recalculate_locks[level]
  
  
-class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
+class NoHooksLU(LogicalUnit): # pylint: disable=W0223
    """Simple LU which runs no hooks.
  
    This LU is intended as a parent for other LogicalUnits which will
@@ -478,13 +484,13 @@ class _QueryBase:
    #: Attribute holding field definitions
    FIELDS = None
  
-  def __init__(self, filter_, fields, use_locking):
+  def __init__(self, qfilter, fields, use_locking):
      """Initializes this class.
  
      """
      self.use_locking = use_locking
  
-    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
+    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
                               namefield="name")
      self.requested_data = self.query.RequestedData()
      self.names = self.query.RequestedNames()
@@ -500,7 +506,7 @@ class _QueryBase:
  
      """
      if self.do_locking:
-      names = lu.glm.list_owned(lock_level)
+      names = lu.owned_locks(lock_level)
      else:
        names = all_names
  
@@ -560,6 +566,90 @@ class _QueryBase:
                                      sort_by_name=self.sort_by_name)
  
  
+def _ShareAll():
+  """Returns a dict declaring all lock levels shared.
+
+  """
+  return dict.fromkeys(locking.LEVELS, 1)
+
+
+def _MakeLegacyNodeInfo(data):
+  """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
+
+  Converts the data into a single dictionary. This is fine for most use cases,
+  but some require information from more than one volume group or hypervisor.
+
+  """
+  (bootid, (vg_info, ), (hv_info, )) = data
+
+  return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
+    "bootid": bootid,
+    })
+
+
+def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
+  """Checks if the owned node groups are still correct for an instance.
+
+  @type cfg: L{config.ConfigWriter}
+  @param cfg: The cluster configuration
+  @type instance_name: string
+  @param instance_name: Instance name
+  @type owned_groups: set or frozenset
+  @param owned_groups: List of currently owned node groups
+
+  """
+  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
+
+  if not owned_groups.issuperset(inst_groups):
+    raise errors.OpPrereqError("Instance %s's node groups changed since"
+                               " locks were acquired, current groups are"
+                               " are '%s', owning groups '%s'; retry the"
+                               " operation" %
+                               (instance_name,
+                                utils.CommaJoin(inst_groups),
+                                utils.CommaJoin(owned_groups)),
+                               errors.ECODE_STATE)
+
+  return inst_groups
+
+
+def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
+  """Checks if the instances in a node group are still correct.
+
+  @type cfg: L{config.ConfigWriter}
+  @param cfg: The cluster configuration
+  @type group_uuid: string
+  @param group_uuid: Node group UUID
+  @type owned_instances: set or frozenset
+  @param owned_instances: List of currently owned instances
+
+  """
+  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
+  if owned_instances != wanted_instances:
+    raise errors.OpPrereqError("Instances in node group '%s' changed since"
+                               " locks were acquired, wanted '%s', have '%s';"
+                               " retry the operation" %
+                               (group_uuid,
+                                utils.CommaJoin(wanted_instances),
+                                utils.CommaJoin(owned_instances)),
+                               errors.ECODE_STATE)
+
+  return wanted_instances
+
+
+def _SupportsOob(cfg, node):
+  """Tells if node supports OOB.
+
+  @type cfg: L{config.ConfigWriter}
+  @param cfg: The cluster configuration
+  @type node: L{objects.Node}
+  @param node: The node
+  @return: The OOB script if supported or an empty string otherwise
+
+  """
+  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
+
+
  def _GetWantedNodes(lu, nodes):
    """Returns list of checked and expanded node names.
  
@@ -631,6 +721,71 @@ def _GetUpdatedParams(old_params, update_dict,
    return params_copy
  
  
+def _UpdateAndVerifySubDict(base, updates, type_check):
+  """Updates and verifies a dict with sub dicts of the same type.
+
+  @param base: The dict with the old data
+  @param updates: The dict with the new data
+  @param type_check: Dict suitable to ForceDictType to verify correct types
+  @returns: A new dict with updated and verified values
+
+  """
+  def fn(old, value):
+    new = _GetUpdatedParams(old, value)
+    utils.ForceDictType(new, type_check)
+    return new
+
+  ret = copy.deepcopy(base)
+  ret.update(dict((key, fn(base.get(key, {}), value))
+                  for key, value in updates.items()))
+  return ret
+
+
+def _MergeAndVerifyHvState(op_input, obj_input):
+  """Combines the hv state from an opcode with the one of the object
+
+  @param op_input: The input dict from the opcode
+  @param obj_input: The input dict from the objects
+  @return: The verified and updated dict
+
+  """
+  if op_input:
+    invalid_hvs = set(op_input) - constants.HYPER_TYPES
+    if invalid_hvs:
+      raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
+                                 " %s" % utils.CommaJoin(invalid_hvs),
+                                 errors.ECODE_INVAL)
+    if obj_input is None:
+      obj_input = {}
+    type_check = constants.HVSTS_PARAMETER_TYPES
+    return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
+
+  return None
+
+
+def _MergeAndVerifyDiskState(op_input, obj_input):
+  """Combines the disk state from an opcode with the one of the object
+
+  @param op_input: The input dict from the opcode
+  @param obj_input: The input dict from the objects
+  @return: The verified and updated dict
+  """
+  if op_input:
+    invalid_dst = set(op_input) - constants.DS_VALID_TYPES
+    if invalid_dst:
+      raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
+                                 utils.CommaJoin(invalid_dst),
+                                 errors.ECODE_INVAL)
+    type_check = constants.DSS_PARAMETER_TYPES
+    if obj_input is None:
+      obj_input = {}
+    return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
+                                              type_check))
+                for key, value in op_input.items())
+
+  return None
+
+
  def _ReleaseLocks(lu, level, names=None, keep=None):
    """Releases locks owned by an LU.
  
@@ -652,23 +807,28 @@ def _ReleaseLocks(lu, level, names=None, keep=None):
    else:
      should_release = None
  
-  if should_release:
+  owned = lu.owned_locks(level)
+  if not owned:
+    # Not owning any lock at this level, do nothing
+    pass
+
+  elif should_release:
      retain = []
      release = []
  
      # Determine which locks to release
-    for name in lu.glm.list_owned(level):
+    for name in owned:
        if should_release(name):
          release.append(name)
        else:
          retain.append(name)
  
-    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
+    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
  
      # Release just some locks
      lu.glm.release(level, names=release)
  
-    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
+    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
    else:
      # Release everything
      lu.glm.release(level)
@@ -676,15 +836,28 @@ def _ReleaseLocks(lu, level, names=None, keep=None):
      assert not lu.glm.is_owned(level), "No locks should be owned"
  
  
+def _MapInstanceDisksToNodes(instances):
+  """Creates a map from (node, volume) to instance name.
+
+  @type instances: list of L{objects.Instance}
+  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
+
+  """
+  return dict(((node, vol), inst.name)
+              for inst in instances
+              for (node, vols) in inst.MapLVsByNode().items()
+              for vol in vols)
+
+
  def _RunPostHook(lu, node_name):
    """Runs the post-hook for an opcode on a single node.
  
    """
-  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
+  hm = lu.proc.BuildHooksManager(lu)
    try:
      hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
    except:
-    # pylint: disable-msg=W0702
+    # pylint: disable=W0702
      lu.LogWarning("Errors occurred running hooks on %s" % node_name)
  
  
@@ -816,20 +989,51 @@ def _GetClusterDomainSecret():
                                 strict=True)
  
  
-def _CheckInstanceDown(lu, instance, reason):
-  """Ensure that an instance is not running."""
-  if instance.admin_up:
-    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
-                               (instance.name, reason), errors.ECODE_STATE)
+def _CheckInstanceState(lu, instance, req_states, msg=None):
+  """Ensure that an instance is in one of the required states.
+
+  @param lu: the LU on behalf of which we make the check
+  @param instance: the instance to check
+  @param msg: if passed, should be a message to replace the default one
+  @raise errors.OpPrereqError: if the instance is not in the required state
+
+  """
+  if msg is None:
+    msg = "can't use instance from outside %s states" % ", ".join(req_states)
+  if instance.admin_state not in req_states:
+    raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
+                               (instance, instance.admin_state, msg),
+                               errors.ECODE_STATE)
+
+  if constants.ADMINST_UP not in req_states:
+    pnode = instance.primary_node
+    ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
+    ins_l.Raise("Can't contact node %s for instance information" % pnode,
+                prereq=True, ecode=errors.ECODE_ENVIRON)
+
+    if instance.name in ins_l.payload:
+      raise errors.OpPrereqError("Instance %s is running, %s" %
+                                 (instance.name, msg), errors.ECODE_STATE)
+
+
+def _CheckMinMaxSpecs(name, ipolicy, value):
+  """Checks if value is in the desired range.
  
-  pnode = instance.primary_node
-  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
-  ins_l.Raise("Can't contact node %s for instance information" % pnode,
-              prereq=True, ecode=errors.ECODE_ENVIRON)
+  @param name: name of the parameter for which we perform the check
+  @param ipolicy: dictionary containing min, max and std values
+  @param value: actual value that we want to use
+  @return: None or element not meeting the criteria
  
-  if instance.name in ins_l.payload:
-    raise errors.OpPrereqError("Instance %s is running, %s" %
-                               (instance.name, reason), errors.ECODE_STATE)
+
+  """
+  if value in [None, constants.VALUE_AUTO]:
+    return None
+  max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
+  min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
+  if value > max_v or min_v > value:
+    return ("%s value %s is not in range [%s, %s]" %
+            (name, value, min_v, max_v))
+  return None
  
  
  def _ExpandItemName(fn, name, kind):
@@ -860,8 +1064,8 @@ def _ExpandInstanceName(cfg, name):
  
  
  def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
-                          memory, vcpus, nics, disk_template, disks,
-                          bep, hvp, hypervisor_name):
+                          minmem, maxmem, vcpus, nics, disk_template, disks,
+                          bep, hvp, hypervisor_name, tags):
    """Builds instance related env variables for hooks
  
    This builds the hook environment from individual variables.
@@ -874,10 +1078,12 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
    @param secondary_nodes: list of secondary nodes as strings
    @type os_type: string
    @param os_type: the name of the instance's OS
-  @type status: boolean
-  @param status: the should_run status of the instance
-  @type memory: string
-  @param memory: the memory size of the instance
+  @type status: string
+  @param status: the desired status of the instance
+  @type minmem: string
+  @param minmem: the minimum memory size of the instance
+  @type maxmem: string
+  @param maxmem: the maximum memory size of the instance
    @type vcpus: string
    @param vcpus: the count of VCPUs the instance has
    @type nics: list
@@ -893,27 +1099,27 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
    @param hvp: the hypervisor parameters for the instance
    @type hypervisor_name: string
    @param hypervisor_name: the hypervisor for the instance
+  @type tags: list
+  @param tags: list of instance tags as strings
    @rtype: dict
    @return: the hook environment for this instance
  
    """
-  if status:
-    str_status = "up"
-  else:
-    str_status = "down"
    env = {
      "OP_TARGET": name,
      "INSTANCE_NAME": name,
      "INSTANCE_PRIMARY": primary_node,
      "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
      "INSTANCE_OS_TYPE": os_type,
-    "INSTANCE_STATUS": str_status,
-    "INSTANCE_MEMORY": memory,
+    "INSTANCE_STATUS": status,
+    "INSTANCE_MINMEM": minmem,
+    "INSTANCE_MAXMEM": maxmem,
+    # TODO(2.7) remove deprecated "memory" value
+    "INSTANCE_MEMORY": maxmem,
      "INSTANCE_VCPUS": vcpus,
      "INSTANCE_DISK_TEMPLATE": disk_template,
      "INSTANCE_HYPERVISOR": hypervisor_name,
    }
-
    if nics:
      nic_count = len(nics)
      for idx, (ip, mac, mode, link) in enumerate(nics):
@@ -940,6 +1146,11 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
  
    env["INSTANCE_DISK_COUNT"] = disk_count
  
+  if not tags:
+    tags = []
+
+  env["INSTANCE_TAGS"] = " ".join(tags)
+
    for source, kind in [(bep, "BE"), (hvp, "HV")]:
      for key, value in source.items():
        env["INSTANCE_%s_%s" % (kind, key)] = value
@@ -990,23 +1201,25 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None):
    bep = cluster.FillBE(instance)
    hvp = cluster.FillHV(instance)
    args = {
-    'name': instance.name,
-    'primary_node': instance.primary_node,
-    'secondary_nodes': instance.secondary_nodes,
-    'os_type': instance.os,
-    'status': instance.admin_up,
-    'memory': bep[constants.BE_MEMORY],
-    'vcpus': bep[constants.BE_VCPUS],
-    'nics': _NICListToTuple(lu, instance.nics),
-    'disk_template': instance.disk_template,
-    'disks': [(disk.size, disk.mode) for disk in instance.disks],
-    'bep': bep,
-    'hvp': hvp,
-    'hypervisor_name': instance.hypervisor,
+    "name": instance.name,
+    "primary_node": instance.primary_node,
+    "secondary_nodes": instance.secondary_nodes,
+    "os_type": instance.os,
+    "status": instance.admin_state,
+    "maxmem": bep[constants.BE_MAXMEM],
+    "minmem": bep[constants.BE_MINMEM],
+    "vcpus": bep[constants.BE_VCPUS],
+    "nics": _NICListToTuple(lu, instance.nics),
+    "disk_template": instance.disk_template,
+    "disks": [(disk.size, disk.mode) for disk in instance.disks],
+    "bep": bep,
+    "hvp": hvp,
+    "hypervisor_name": instance.hypervisor,
+    "tags": instance.tags,
    }
    if override:
      args.update(override)
-  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
+  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
  
  
  def _AdjustCandidatePool(lu, exceptions):
@@ -1036,6 +1249,14 @@ def _DecideSelfPromotion(lu, exceptions=None):
    return mc_now < mc_should
  
  
+def _CalculateGroupIPolicy(cfg, group):
+  """Calculate instance policy for group.
+
+  """
+  cluster = cfg.GetClusterInfo()
+  return cluster.SimpleFillIPolicy(group.ipolicy)
+
+
  def _CheckNicsBridgesExist(lu, target_nics, target_node):
    """Check that the brigdes needed by a list of nics exist.
  
@@ -1068,9 +1289,13 @@ def _CheckOSVariant(os_obj, name):
    @param name: OS name passed by the user, to check for validity
  
    """
+  variant = objects.OS.GetVariant(name)
    if not os_obj.supported_variants:
+    if variant:
+      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
+                                 " passed)" % (os_obj.name, variant),
+                                 errors.ECODE_INVAL)
      return
-  variant = objects.OS.GetVariant(name)
    if not variant:
      raise errors.OpPrereqError("OS name must include a variant",
                                 errors.ECODE_INVAL)
@@ -1119,13 +1344,13 @@ def _GetStorageTypeArgs(cfg, storage_type):
    return []
  
  
-def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
+def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
    faulty = []
  
    for dev in instance.disks:
      cfg.SetDiskID(dev, node_name)
  
-  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
+  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
    result.Raise("Failed to get disk status from node %s" % node_name,
                 prereq=prereq, ecode=errors.ECODE_ENVIRON)
  
@@ -1168,6 +1393,29 @@ def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
                                   " iallocator")
  
  
+def _GetDefaultIAllocator(cfg, iallocator):
+  """Decides on which iallocator to use.
+
+  @type cfg: L{config.ConfigWriter}
+  @param cfg: Cluster configuration object
+  @type iallocator: string or None
+  @param iallocator: Iallocator specified in opcode
+  @rtype: string
+  @return: Iallocator name
+
+  """
+  if not iallocator:
+    # Use default iallocator
+    iallocator = cfg.GetDefaultIAllocator()
+
+  if not iallocator:
+    raise errors.OpPrereqError("No iallocator was specified, neither in the"
+                               " opcode nor as a cluster-wide default",
+                               errors.ECODE_INVAL)
+
+  return iallocator
+
+
  class LUClusterPostInit(LogicalUnit):
    """Logical unit for running hooks after cluster initialization.
  
@@ -1242,19 +1490,21 @@ class LUClusterDestroy(LogicalUnit):
      """Destroys the cluster.
  
      """
-    master = self.cfg.GetMasterNode()
+    master_params = self.cfg.GetMasterNetworkParameters()
  
      # Run post hooks on master node before it's removed
-    _RunPostHook(self, master)
+    _RunPostHook(self, master_params.name)
  
-    result = self.rpc.call_node_stop_master(master, False)
+    ems = self.cfg.GetUseExternalMipScript()
+    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
+                                                     master_params, ems)
      result.Raise("Could not disable the master role")
  
-    return master
+    return master_params.name
  
  
  def _VerifyCertificate(filename):
-  """Verifies a certificate for LUClusterVerifyConfig.
+  """Verifies a certificate for L{LUClusterVerifyConfig}.
  
    @type filename: string
    @param filename: Path to PEM file
@@ -1263,7 +1513,7 @@ def _VerifyCertificate(filename):
    try:
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
                                             utils.ReadFile(filename))
-  except Exception, err: # pylint: disable-msg=W0703
+  except Exception, err: # pylint: disable=W0703
      return (LUClusterVerifyConfig.ETYPE_ERROR,
              "Failed to load X509 certificate %s: %s" % (filename, err))
  
@@ -1325,39 +1575,6 @@ class _VerifyErrors(object):
    self.op and self._feedback_fn to be available.)
  
    """
-  TCLUSTER = "cluster"
-  TNODE = "node"
-  TINSTANCE = "instance"
-
-  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
-  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
-  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
-  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
-  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
-  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
-  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
-  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
-  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
-  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
-  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
-  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
-  ENODEDRBD = (TNODE, "ENODEDRBD")
-  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
-  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
-  ENODEHOOKS = (TNODE, "ENODEHOOKS")
-  ENODEHV = (TNODE, "ENODEHV")
-  ENODELVM = (TNODE, "ENODELVM")
-  ENODEN1 = (TNODE, "ENODEN1")
-  ENODENET = (TNODE, "ENODENET")
-  ENODEOS = (TNODE, "ENODEOS")
-  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
-  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
-  ENODERPC = (TNODE, "ENODERPC")
-  ENODESSH = (TNODE, "ENODESSH")
-  ENODEVERSION = (TNODE, "ENODEVERSION")
-  ENODESETUP = (TNODE, "ENODESETUP")
-  ENODETIME = (TNODE, "ENODETIME")
-  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
  
    ETYPE_FIELD = "code"
    ETYPE_ERROR = "ERROR"
@@ -1373,12 +1590,12 @@ class _VerifyErrors(object):
  
      """
      ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
-    itype, etxt = ecode
+    itype, etxt, _ = ecode
      # first complete the msg
      if args:
        msg = msg % args
      # then format the whole message
-    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
+    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
        msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
      else:
        if item:
@@ -1387,26 +1604,78 @@ class _VerifyErrors(object):
          item = ""
        msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
      # and finally report it via the feedback_fn
-    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
+    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
  
-  def _ErrorIf(self, cond, *args, **kwargs):
+  def _ErrorIf(self, cond, ecode, *args, **kwargs):
      """Log an error message if the passed condition is True.
  
      """
      cond = (bool(cond)
-            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
+            or self.op.debug_simulate_errors) # pylint: disable=E1101
+
+    # If the error code is in the list of ignored errors, demote the error to a
+    # warning
+    (_, etxt, _) = ecode
+    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
+      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
+
      if cond:
-      self._Error(*args, **kwargs)
+      self._Error(ecode, *args, **kwargs)
+
      # do not mark the operation as failed for WARN cases only
      if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
        self.bad = self.bad or cond
  
  
+class LUClusterVerify(NoHooksLU):
+  """Submits all jobs necessary to verify the cluster.
+
+  """
+  REQ_BGL = False
+
+  def ExpandNames(self):
+    self.needed_locks = {}
+
+  def Exec(self, feedback_fn):
+    jobs = []
+
+    if self.op.group_name:
+      groups = [self.op.group_name]
+      depends_fn = lambda: None
+    else:
+      groups = self.cfg.GetNodeGroupList()
+
+      # Verify global configuration
+      jobs.append([
+        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
+        ])
+
+      # Always depend on global verification
+      depends_fn = lambda: [(-len(jobs), [])]
+
+    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
+                                            ignore_errors=self.op.ignore_errors,
+                                            depends=depends_fn())]
+                for group in groups)
+
+    # Fix up all parameters
+    for op in itertools.chain(*jobs): # pylint: disable=W0142
+      op.debug_simulate_errors = self.op.debug_simulate_errors
+      op.verbose = self.op.verbose
+      op.error_codes = self.op.error_codes
+      try:
+        op.skip_checks = self.op.skip_checks
+      except AttributeError:
+        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
+
+    return ResultWithJobs(jobs)
+
+
  class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
    """Verifies the cluster config.
  
    """
-  REQ_BGL = False
+  REQ_BGL = True
  
    def _VerifyHVP(self, hvp_data):
      """Verifies locally the syntax of the hypervisor parameters.
@@ -1420,9 +1689,12 @@ class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
          hv_class.CheckParameterSyntax(hv_params)
        except errors.GenericError, err:
-        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
+        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
  
    def ExpandNames(self):
+    # Information can be safely retrieved as the BGL is acquired in exclusive
+    # mode
+    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
      self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
      self.all_node_info = self.cfg.GetAllNodesInfo()
      self.all_inst_info = self.cfg.GetAllInstancesInfo()
@@ -1438,13 +1710,13 @@ class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
      feedback_fn("* Verifying cluster config")
  
      for msg in self.cfg.VerifyConfig():
-      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
+      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
  
      feedback_fn("* Verifying cluster certificate files")
  
      for cert_filename in constants.ALL_CERT_FILES:
        (errcode, msg) = _VerifyCertificate(cert_filename)
-      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
+      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
  
      feedback_fn("* Verifying hypervisor parameters")
  
@@ -1454,7 +1726,7 @@ class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
      feedback_fn("* Verifying all nodes belong to an existing group")
  
      # We do this verification here because, should this bogus circumstance
-    # occur, it would never be catched by VerifyGroup, which only acts on
+    # occur, it would never be caught by VerifyGroup, which only acts on
      # nodes/instances reachable from existing node groups.
  
      dangling_nodes = set(node.name for node in self.all_node_info.values()
@@ -1476,15 +1748,17 @@ class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
                                                  ["no instances"])))
          for node in dangling_nodes]
  
-    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
+    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
+                  None,
                    "the following nodes (and their instances) belong to a non"
                    " existing group: %s", utils.CommaJoin(pretty_dangling))
  
-    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
+    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
+                  None,
                    "the following instances have a non-existing primary-node:"
                    " %s", utils.CommaJoin(no_node_instances))
  
-    return (not self.bad, [g.name for g in self.all_group_info.values()])
+    return not self.bad
  
  
  class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
@@ -1551,60 +1825,60 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
      # This raises errors.OpPrereqError on its own:
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
  
-    all_node_info = self.cfg.GetAllNodesInfo()
-    all_inst_info = self.cfg.GetAllInstancesInfo()
+    # Get instances in node group; this is unsafe and needs verification later
+    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
  
-    node_names = set(node.name
-                     for node in all_node_info.values()
-                     if node.group == self.group_uuid)
+    self.needed_locks = {
+      locking.LEVEL_INSTANCE: inst_names,
+      locking.LEVEL_NODEGROUP: [self.group_uuid],
+      locking.LEVEL_NODE: [],
+      }
  
-    inst_names = [inst.name
-                  for inst in all_inst_info.values()
-                  if inst.primary_node in node_names]
+    self.share_locks = _ShareAll()
  
-    # In Exec(), we warn about mirrored instances that have primary and
-    # secondary living in separate node groups. To fully verify that
-    # volumes for these instances are healthy, we will need to do an
-    # extra call to their secondaries. We ensure here those nodes will
-    # be locked.
-    for inst in inst_names:
-      if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
-        node_names.update(all_inst_info[inst].secondary_nodes)
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_NODE:
+      # Get members of node group; this is unsafe and needs verification later
+      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
  
-    self.needed_locks = {
-      locking.LEVEL_NODEGROUP: [self.group_uuid],
-      locking.LEVEL_NODE: list(node_names),
-      locking.LEVEL_INSTANCE: inst_names,
-    }
+      all_inst_info = self.cfg.GetAllInstancesInfo()
  
-    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+      # In Exec(), we warn about mirrored instances that have primary and
+      # secondary living in separate node groups. To fully verify that
+      # volumes for these instances are healthy, we will need to do an
+      # extra call to their secondaries. We ensure here those nodes will
+      # be locked.
+      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
+        # Important: access only the instances whose lock is owned
+        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
+          nodes.update(all_inst_info[inst].secondary_nodes)
  
-  def CheckPrereq(self):
-    self.all_node_info = self.cfg.GetAllNodesInfo()
-    self.all_inst_info = self.cfg.GetAllInstancesInfo()
+      self.needed_locks[locking.LEVEL_NODE] = nodes
  
-    group_nodes = set(node.name
-                      for node in self.all_node_info.values()
-                      if node.group == self.group_uuid)
+  def CheckPrereq(self):
+    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
+    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
  
-    group_instances = set(inst.name
-                          for inst in self.all_inst_info.values()
-                          if inst.primary_node in group_nodes)
+    group_nodes = set(self.group_info.members)
+    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
  
      unlocked_nodes = \
-        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
+        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
  
      unlocked_instances = \
-        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
+        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
  
      if unlocked_nodes:
-      raise errors.OpPrereqError("missing lock for nodes: %s" %
+      raise errors.OpPrereqError("Missing lock for nodes: %s" %
                                   utils.CommaJoin(unlocked_nodes))
  
      if unlocked_instances:
-      raise errors.OpPrereqError("missing lock for instances: %s" %
+      raise errors.OpPrereqError("Missing lock for instances: %s" %
                                   utils.CommaJoin(unlocked_instances))
  
+    self.all_node_info = self.cfg.GetAllNodesInfo()
+    self.all_inst_info = self.cfg.GetAllInstancesInfo()
+
      self.my_node_names = utils.NiceSort(group_nodes)
      self.my_inst_names = utils.NiceSort(group_instances)
  
@@ -1626,7 +1900,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
              extra_lv_nodes.add(nname)
  
      unlocked_lv_nodes = \
-        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
+        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
  
      if unlocked_lv_nodes:
        raise errors.OpPrereqError("these nodes could be locked: %s" %
@@ -1649,11 +1923,11 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      """
      node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
  
      # main result, nresult should be a non-empty dict
      test = not nresult or not isinstance(nresult, dict)
-    _ErrorIf(test, self.ENODERPC, node,
+    _ErrorIf(test, constants.CV_ENODERPC, node,
                    "unable to verify node: no data returned")
      if test:
        return False
@@ -1664,13 +1938,13 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
      test = not (remote_version and
                  isinstance(remote_version, (list, tuple)) and
                  len(remote_version) == 2)
-    _ErrorIf(test, self.ENODERPC, node,
+    _ErrorIf(test, constants.CV_ENODERPC, node,
               "connection to node returned invalid data")
      if test:
        return False
  
      test = local_version != remote_version[0]
-    _ErrorIf(test, self.ENODEVERSION, node,
+    _ErrorIf(test, constants.CV_ENODEVERSION, node,
               "incompatible protocol versions: master %s,"
               " node %s", local_version, remote_version[0])
      if test:
@@ -1680,7 +1954,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      # full package version
      self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
-                  self.ENODEVERSION, node,
+                  constants.CV_ENODEVERSION, node,
                    "software version mismatch: master %s, node %s",
                    constants.RELEASE_VERSION, remote_version[1],
                    code=self.ETYPE_WARNING)
@@ -1689,19 +1963,19 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
      if ninfo.vm_capable and isinstance(hyp_result, dict):
        for hv_name, hv_result in hyp_result.iteritems():
          test = hv_result is not None
-        _ErrorIf(test, self.ENODEHV, node,
+        _ErrorIf(test, constants.CV_ENODEHV, node,
                   "hypervisor %s verify failure: '%s'", hv_name, hv_result)
  
      hvp_result = nresult.get(constants.NV_HVPARAMS, None)
      if ninfo.vm_capable and isinstance(hvp_result, list):
        for item, hv_name, hv_result in hvp_result:
-        _ErrorIf(True, self.ENODEHV, node,
+        _ErrorIf(True, constants.CV_ENODEHV, node,
                   "hypervisor %s parameter verify failure (source %s): %s",
                   hv_name, item, hv_result)
  
      test = nresult.get(constants.NV_NODESETUP,
                         ["Missing NODESETUP results"])
-    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
+    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
               "; ".join(test))
  
      return True
@@ -1718,13 +1992,13 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      """
      node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
  
      ntime = nresult.get(constants.NV_TIME, None)
      try:
        ntime_merged = utils.MergeTime(ntime)
      except (ValueError, TypeError):
-      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
+      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
        return
  
      if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
@@ -1734,7 +2008,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
      else:
        ntime_diff = None
  
-    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
+    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
               "Node time diverges by at least %s from master node time",
               ntime_diff)
  
@@ -1751,29 +2025,30 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        return
  
      node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
  
      # checks vg existence and size > 20G
      vglist = nresult.get(constants.NV_VGLIST, None)
      test = not vglist
-    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
+    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
      if not test:
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
                                              constants.MIN_VG_SIZE)
-      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
+      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
  
      # check pv names
      pvlist = nresult.get(constants.NV_PVLIST, None)
      test = pvlist is None
-    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
+    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
      if not test:
        # check that ':' is not present in PV names, since it's a
        # special character for lvcreate (denotes the range of PEs to
        # use on the PV)
        for _, pvname, owner_vg in pvlist:
          test = ":" in pvname
-        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
-                 " '%s' of VG '%s'", pvname, owner_vg)
+        _ErrorIf(test, constants.CV_ENODELVM, node,
+                 "Invalid character ':' in PV '%s' of VG '%s'",
+                 pvname, owner_vg)
  
    def _VerifyNodeBridges(self, ninfo, nresult, bridges):
      """Check the node bridges.
@@ -1788,15 +2063,35 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        return
  
      node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
  
      missing = nresult.get(constants.NV_BRIDGES, None)
      test = not isinstance(missing, list)
-    _ErrorIf(test, self.ENODENET, node,
+    _ErrorIf(test, constants.CV_ENODENET, node,
               "did not return valid bridge information")
      if not test:
-      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
-               utils.CommaJoin(sorted(missing)))
+      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
+               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
+
+  def _VerifyNodeUserScripts(self, ninfo, nresult):
+    """Check the results of user scripts presence and executability on the node
+
+    @type ninfo: L{objects.Node}
+    @param ninfo: the node to check
+    @param nresult: the remote results for the node
+
+    """
+    node = ninfo.name
+
+    test = not constants.NV_USERSCRIPTS in nresult
+    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
+                  "did not return user scripts information")
+
+    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
+    if not test:
+      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
+                    "user scripts not present or not executable: %s" %
+                    utils.CommaJoin(sorted(broken_scripts)))
  
    def _VerifyNodeNetwork(self, ninfo, nresult):
      """Check the node network connectivity results.
@@ -1807,30 +2102,30 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      """
      node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
  
      test = constants.NV_NODELIST not in nresult
-    _ErrorIf(test, self.ENODESSH, node,
+    _ErrorIf(test, constants.CV_ENODESSH, node,
               "node hasn't returned node ssh connectivity data")
      if not test:
        if nresult[constants.NV_NODELIST]:
          for a_node, a_msg in nresult[constants.NV_NODELIST].items():
-          _ErrorIf(True, self.ENODESSH, node,
+          _ErrorIf(True, constants.CV_ENODESSH, node,
                     "ssh communication with node '%s': %s", a_node, a_msg)
  
      test = constants.NV_NODENETTEST not in nresult
-    _ErrorIf(test, self.ENODENET, node,
+    _ErrorIf(test, constants.CV_ENODENET, node,
               "node hasn't returned node tcp connectivity data")
      if not test:
        if nresult[constants.NV_NODENETTEST]:
          nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
          for anode in nlist:
-          _ErrorIf(True, self.ENODENET, node,
+          _ErrorIf(True, constants.CV_ENODENET, node,
                     "tcp communication with node '%s': %s",
                     anode, nresult[constants.NV_NODENETTEST][anode])
  
      test = constants.NV_MASTERIP not in nresult
-    _ErrorIf(test, self.ENODENET, node,
+    _ErrorIf(test, constants.CV_ENODENET, node,
               "node hasn't returned node master IP reachability data")
      if not test:
        if not nresult[constants.NV_MASTERIP]:
@@ -1838,7 +2133,35 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
            msg = "the master node cannot reach the master IP (not configured?)"
          else:
            msg = "cannot reach the master IP"
-        _ErrorIf(True, self.ENODENET, node, msg)
+        _ErrorIf(True, constants.CV_ENODENET, node, msg)
+
+  def _VerifyInstancePolicy(self, instance):
+    """Verify instance specs against instance policy set on node group level.
+
+
+    """
+    cluster = self.cfg.GetClusterInfo()
+    full_beparams = cluster.FillBE(instance)
+    ipolicy = cluster.SimpleFillIPolicy(self.group_info.ipolicy)
+
+    mem_size = full_beparams.get(constants.BE_MAXMEM, None)
+    cpu_count = full_beparams.get(constants.BE_VCPUS, None)
+    disk_count = len(instance.disks)
+    disk_sizes = [disk.size for disk in instance.disks]
+    nic_count = len(instance.nics)
+
+    test_settings = [
+      (constants.ISPEC_MEM_SIZE, mem_size),
+      (constants.ISPEC_CPU_COUNT, cpu_count),
+      (constants.ISPEC_DISK_COUNT, disk_count),
+      (constants.ISPEC_NIC_COUNT, nic_count),
+      ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
+
+    for (name, value) in test_settings:
+      test_result = _CheckMinMaxSpecs(name, ipolicy, value)
+      self._ErrorIf(test_result is not None,
+                    constants.CV_EINSTANCEPOLICY, instance.name,
+                    test_result)
  
    def _VerifyInstance(self, instance, instanceconfig, node_image,
                        diskstatus):
@@ -1848,12 +2171,14 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
      available on the instance's node.
  
      """
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
      node_current = instanceconfig.primary_node
  
      node_vol_should = {}
      instanceconfig.MapLVsByNode(node_vol_should)
  
+    self._VerifyInstancePolicy(instanceconfig)
+
      for node in node_vol_should:
        n_img = node_image[node]
        if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
@@ -1861,13 +2186,13 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          continue
        for volume in node_vol_should[node]:
          test = volume not in n_img.volumes
-        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
+        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
                   "volume %s missing on node %s", volume, node)
  
-    if instanceconfig.admin_up:
+    if instanceconfig.admin_state == constants.ADMINST_UP:
        pri_img = node_image[node_current]
        test = instance not in pri_img.instances and not pri_img.offline
-      _ErrorIf(test, self.EINSTANCEDOWN, instance,
+      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
                 "instance not running on its primary node %s",
                 node_current)
  
@@ -1880,13 +2205,14 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        # node here
        snode = node_image[nname]
        bad_snode = snode.ghost or snode.offline
-      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
-               self.EINSTANCEFAULTYDISK, instance,
+      _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
+               not success and not bad_snode,
+               constants.CV_EINSTANCEFAULTYDISK, instance,
                 "couldn't retrieve status for disk/%s on %s: %s",
                 idx, nname, bdev_status)
-      _ErrorIf((instanceconfig.admin_up and success and
-                bdev_status.ldisk_status == constants.LDS_FAULTY),
-               self.EINSTANCEFAULTYDISK, instance,
+      _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
+                success and bdev_status.ldisk_status == constants.LDS_FAULTY),
+               constants.CV_EINSTANCEFAULTYDISK, instance,
                 "disk/%s on %s is faulty", idx, nname)
  
    def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
@@ -1907,7 +2233,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          test = ((node not in node_vol_should or
                  volume not in node_vol_should[node]) and
                  not reserved.Matches(volume))
-        self._ErrorIf(test, self.ENODEORPHANLV, node,
+        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
                        "volume %s is unknown", volume)
  
    def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
@@ -1933,21 +2259,23 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          # we already list instances living on such nodes, and that's
          # enough warning
          continue
+      #TODO(dynmem): use MINMEM for checking
+      #TODO(dynmem): also consider ballooning out other instances
        for prinode, instances in n_img.sbp.items():
          needed_mem = 0
          for instance in instances:
            bep = cluster_info.FillBE(instance_cfg[instance])
            if bep[constants.BE_AUTO_BALANCE]:
-            needed_mem += bep[constants.BE_MEMORY]
+            needed_mem += bep[constants.BE_MAXMEM]
          test = n_img.mfree < needed_mem
-        self._ErrorIf(test, self.ENODEN1, node,
+        self._ErrorIf(test, constants.CV_ENODEN1, node,
                        "not enough memory to accomodate instance failovers"
                        " should node %s fail (%dMiB needed, %dMiB available)",
                        prinode, needed_mem, n_img.mfree)
  
    @classmethod
    def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
-                   (files_all, files_all_opt, files_mc, files_vm)):
+                   (files_all, files_opt, files_mc, files_vm)):
      """Verifies file checksums collected from all nodes.
  
      @param errorif: Callback for reporting errors
@@ -1956,25 +2284,35 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
      @param all_nvinfo: RPC results
  
      """
-    node_names = frozenset(node.name for node in nodeinfo)
+    # Define functions determining which nodes to consider for a file
+    files2nodefn = [
+      (files_all, None),
+      (files_mc, lambda node: (node.master_candidate or
+                               node.name == master_node)),
+      (files_vm, lambda node: node.vm_capable),
+      ]
  
-    assert master_node in node_names
-    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
-            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
-           "Found file listed in more than one file list"
+    # Build mapping from filename to list of nodes which should have the file
+    nodefiles = {}
+    for (files, fn) in files2nodefn:
+      if fn is None:
+        filenodes = nodeinfo
+      else:
+        filenodes = filter(fn, nodeinfo)
+      nodefiles.update((filename,
+                        frozenset(map(operator.attrgetter("name"), filenodes)))
+                       for filename in files)
  
-    # Define functions determining which nodes to consider for a file
-    file2nodefn = dict([(filename, fn)
-      for (files, fn) in [(files_all, None),
-                          (files_all_opt, None),
-                          (files_mc, lambda node: (node.master_candidate or
-                                                   node.name == master_node)),
-                          (files_vm, lambda node: node.vm_capable)]
-      for filename in files])
+    assert set(nodefiles) == (files_all | files_mc | files_vm)
  
-    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
+    fileinfo = dict((filename, {}) for filename in nodefiles)
+    ignore_nodes = set()
  
      for node in nodeinfo:
+      if node.offline:
+        ignore_nodes.add(node.name)
+        continue
+
        nresult = all_nvinfo[node.name]
  
        if nresult.fail_msg or not nresult.payload:
@@ -1983,16 +2321,16 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          node_files = nresult.payload.get(constants.NV_FILELIST, None)
  
        test = not (node_files and isinstance(node_files, dict))
-      errorif(test, cls.ENODEFILECHECK, node.name,
+      errorif(test, constants.CV_ENODEFILECHECK, node.name,
                "Node did not return file checksum data")
        if test:
+        ignore_nodes.add(node.name)
          continue
  
+      # Build per-checksum mapping from filename to nodes having it
        for (filename, checksum) in node_files.items():
-        # Check if the file should be considered for a node
-        fn = file2nodefn[filename]
-        if fn is None or fn(node):
-          fileinfo[filename].setdefault(checksum, set()).add(node.name)
+        assert filename in nodefiles
+        fileinfo[filename].setdefault(checksum, set()).add(node.name)
  
      for (filename, checksums) in fileinfo.items():
        assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
@@ -2000,23 +2338,32 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        # Nodes having the file
        with_file = frozenset(node_name
                              for nodes in fileinfo[filename].values()
-                            for node_name in nodes)
+                            for node_name in nodes) - ignore_nodes
+
+      expected_nodes = nodefiles[filename] - ignore_nodes
  
        # Nodes missing file
-      missing_file = node_names - with_file
+      missing_file = expected_nodes - with_file
  
-      if filename in files_all_opt:
+      if filename in files_opt:
          # All or no nodes
-        errorif(missing_file and missing_file != node_names,
-                cls.ECLUSTERFILECHECK, None,
-                "File %s is optional, but it must exist on all or no nodes (not"
-                " found on %s)",
+        errorif(missing_file and missing_file != expected_nodes,
+                constants.CV_ECLUSTERFILECHECK, None,
+                "File %s is optional, but it must exist on all or no"
+                " nodes (not found on %s)",
                  filename, utils.CommaJoin(utils.NiceSort(missing_file)))
        else:
-        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
+        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
                  "File %s is missing from node(s) %s", filename,
                  utils.CommaJoin(utils.NiceSort(missing_file)))
  
+        # Warn if a node has a file it shouldn't
+        unexpected = with_file - expected_nodes
+        errorif(unexpected,
+                constants.CV_ECLUSTERFILECHECK, None,
+                "File %s should not exist on node(s) %s",
+                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
+
        # See if there are multiple versions of the file
        test = len(checksums) > 1
        if test:
@@ -2027,7 +2374,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        else:
          variants = []
  
-      errorif(test, cls.ECLUSTERFILECHECK, None,
+      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
                "File %s found with %s different checksums (%s)",
                filename, len(checksums), "; ".join(variants))
  
@@ -2045,27 +2392,27 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      """
      node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
  
      if drbd_helper:
        helper_result = nresult.get(constants.NV_DRBDHELPER, None)
        test = (helper_result == None)
-      _ErrorIf(test, self.ENODEDRBDHELPER, node,
+      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
                 "no drbd usermode helper returned")
        if helper_result:
          status, payload = helper_result
          test = not status
-        _ErrorIf(test, self.ENODEDRBDHELPER, node,
+        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
                   "drbd usermode helper check unsuccessful: %s", payload)
          test = status and (payload != drbd_helper)
-        _ErrorIf(test, self.ENODEDRBDHELPER, node,
+        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
                   "wrong drbd usermode helper: %s", payload)
  
      # compute the DRBD minors
      node_drbd = {}
      for minor, instance in drbd_map[node].items():
        test = instance not in instanceinfo
-      _ErrorIf(test, self.ECLUSTERCFG, None,
+      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
                 "ghost instance '%s' in temporary DRBD map", instance)
          # ghost instance should not be running, but otherwise we
          # don't give double warnings (both ghost instance and
@@ -2074,12 +2421,13 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          node_drbd[minor] = (instance, False)
        else:
          instance = instanceinfo[instance]
-        node_drbd[minor] = (instance.name, instance.admin_up)
+        node_drbd[minor] = (instance.name,
+                            instance.admin_state == constants.ADMINST_UP)
  
      # and now check them
      used_minors = nresult.get(constants.NV_DRBDLIST, [])
      test = not isinstance(used_minors, (tuple, list))
-    _ErrorIf(test, self.ENODEDRBD, node,
+    _ErrorIf(test, constants.CV_ENODEDRBD, node,
               "cannot parse drbd status file: %s", str(used_minors))
      if test:
        # we cannot check drbd status
@@ -2087,11 +2435,11 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      for minor, (iname, must_exist) in node_drbd.items():
        test = minor not in used_minors and must_exist
-      _ErrorIf(test, self.ENODEDRBD, node,
+      _ErrorIf(test, constants.CV_ENODEDRBD, node,
                 "drbd minor %d of instance %s is not active", minor, iname)
      for minor in used_minors:
        test = minor not in node_drbd
-      _ErrorIf(test, self.ENODEDRBD, node,
+      _ErrorIf(test, constants.CV_ENODEDRBD, node,
                 "unallocated drbd minor %d is in use", minor)
  
    def _UpdateNodeOS(self, ninfo, nresult, nimg):
@@ -2104,14 +2452,14 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      """
      node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
  
      remote_os = nresult.get(constants.NV_OSLIST, None)
      test = (not isinstance(remote_os, list) or
              not compat.all(isinstance(v, list) and len(v) == 7
                             for v in remote_os))
  
-    _ErrorIf(test, self.ENODEOS, node,
+    _ErrorIf(test, constants.CV_ENODEOS, node,
               "node hasn't returned valid OS data")
  
      nimg.os_fail = test
@@ -2145,7 +2493,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      """
      node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
  
      assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
  
@@ -2153,19 +2501,14 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
      for os_name, os_data in nimg.oslist.items():
        assert os_data, "Empty OS status for OS %s?!" % os_name
        f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
-      _ErrorIf(not f_status, self.ENODEOS, node,
+      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
                 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
-      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
+      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
                 "OS '%s' has multiple entries (first one shadows the rest): %s",
                 os_name, utils.CommaJoin([v[0] for v in os_data]))
-      # this will catched in backend too
-      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
-               and not f_var, self.ENODEOS, node,
-               "OS %s with API at least %d does not declare any variant",
-               os_name, constants.OS_API_V15)
        # comparisons with the 'base' image
        test = os_name not in base.oslist
-      _ErrorIf(test, self.ENODEOS, node,
+      _ErrorIf(test, constants.CV_ENODEOS, node,
                 "Extra OS %s not present on reference node (%s)",
                 os_name, base.name)
        if test:
@@ -2179,14 +2522,14 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
                           ("variants list", f_var, b_var),
                           ("parameters", beautify_params(f_param),
                            beautify_params(b_param))]:
-        _ErrorIf(a != b, self.ENODEOS, node,
+        _ErrorIf(a != b, constants.CV_ENODEOS, node,
                   "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
                   kind, os_name, base.name,
                   utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
  
      # check any missing OSes
      missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
-    _ErrorIf(missing, self.ENODEOS, node,
+    _ErrorIf(missing, constants.CV_ENODEOS, node,
               "OSes present on reference node %s but missing on this node: %s",
               base.name, utils.CommaJoin(missing))
  
@@ -2204,7 +2547,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
      if ((ninfo.master_candidate or ninfo.master_capable) and
          constants.NV_OOB_PATHS in nresult):
        for path_result in nresult[constants.NV_OOB_PATHS]:
-        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
+        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
  
    def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
      """Verifies and updates the node volume data.
@@ -2220,17 +2563,18 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      """
      node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
  
      nimg.lvm_fail = True
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
      if vg_name is None:
        pass
      elif isinstance(lvdata, basestring):
-      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
+      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
                 utils.SafeEncode(lvdata))
      elif not isinstance(lvdata, dict):
-      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
+      _ErrorIf(True, constants.CV_ENODELVM, node,
+               "rpc call to node failed (lvlist)")
      else:
        nimg.volumes = lvdata
        nimg.lvm_fail = False
@@ -2250,8 +2594,9 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
      """
      idata = nresult.get(constants.NV_INSTANCELIST, None)
      test = not isinstance(idata, list)
-    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
-                  " (instancelist): %s", utils.SafeEncode(str(idata)))
+    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
+                  "rpc call to node failed (instancelist): %s",
+                  utils.SafeEncode(str(idata)))
      if test:
        nimg.hyp_fail = True
      else:
@@ -2268,31 +2613,32 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      """
      node = ninfo.name
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
  
      # try to read free memory (from the hypervisor)
      hv_info = nresult.get(constants.NV_HVINFO, None)
      test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
-    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
+    _ErrorIf(test, constants.CV_ENODEHV, node,
+             "rpc call to node failed (hvinfo)")
      if not test:
        try:
          nimg.mfree = int(hv_info["memory_free"])
        except (ValueError, TypeError):
-        _ErrorIf(True, self.ENODERPC, node,
+        _ErrorIf(True, constants.CV_ENODERPC, node,
                   "node returned invalid nodeinfo, check hypervisor")
  
      # FIXME: devise a free space model for file based instances as well
      if vg_name is not None:
        test = (constants.NV_VGLIST not in nresult or
                vg_name not in nresult[constants.NV_VGLIST])
-      _ErrorIf(test, self.ENODELVM, node,
+      _ErrorIf(test, constants.CV_ENODELVM, node,
                 "node didn't return data for the volume group '%s'"
                 " - it is either missing or broken", vg_name)
        if not test:
          try:
            nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
          except (ValueError, TypeError):
-          _ErrorIf(True, self.ENODERPC, node,
+          _ErrorIf(True, constants.CV_ENODERPC, node,
                     "node returned invalid LVM info, check LVM status")
  
    def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
@@ -2310,7 +2656,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          list of tuples (success, payload)
  
      """
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
  
      node_disks = {}
      node_disks_devonly = {}
@@ -2359,7 +2705,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          data = len(disks) * [(False, "node offline")]
        else:
          msg = nres.fail_msg
-        _ErrorIf(msg, self.ENODERPC, nname,
+        _ErrorIf(msg, constants.CV_ENODERPC, nname,
                   "while getting disk information: %s", msg)
          if msg:
            # No data from this node
@@ -2392,6 +2738,40 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      return instdisk
  
+  @staticmethod
+  def _SshNodeSelector(group_uuid, all_nodes):
+    """Create endless iterators for all potential SSH check hosts.
+
+    """
+    nodes = [node for node in all_nodes
+             if (node.group != group_uuid and
+                 not node.offline)]
+    keyfunc = operator.attrgetter("group")
+
+    return map(itertools.cycle,
+               [sorted(map(operator.attrgetter("name"), names))
+                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
+                                                  keyfunc)])
+
+  @classmethod
+  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
+    """Choose which nodes should talk to which other nodes.
+
+    We will make nodes contact all nodes in their group, and one node from
+    every other group.
+
+    @warning: This algorithm has a known issue if one node group is much
+      smaller than others (e.g. just one node). In such a case all other
+      nodes will talk to the single node.
+
+    """
+    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
+    sel = cls._SshNodeSelector(group_uuid, all_nodes)
+
+    return (online_nodes,
+            dict((name, sorted([i.next() for i in sel]))
+                 for name in online_nodes))
+
    def BuildHooksEnv(self):
      """Build hooks env.
  
@@ -2412,17 +2792,22 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
      """Build hooks nodes.
  
      """
-    assert self.my_node_names, ("Node list not gathered,"
-      " has CheckPrereq been executed?")
      return ([], self.my_node_names)
  
    def Exec(self, feedback_fn):
      """Verify integrity of the node group, performing various test on nodes.
  
      """
-    # This method has too many local variables. pylint: disable-msg=R0914
+    # This method has too many local variables. pylint: disable=R0914
+    feedback_fn("* Verifying group '%s'" % self.group_info.name)
+
+    if not self.my_node_names:
+      # empty node group
+      feedback_fn("* Empty node group, skipping verification")
+      return True
+
      self.bad = False
-    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+    _ErrorIf = self._ErrorIf # pylint: disable=C0103
      verbose = self.op.verbose
      self._feedback_fn = feedback_fn
  
@@ -2435,6 +2820,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      i_non_redundant = [] # Non redundant instances
      i_non_a_balanced = [] # Non auto-balanced instances
+    i_offline = 0 # Count of offline instances
      n_offline = 0 # Count of offline nodes
      n_drained = 0 # Count of nodes being drained
      node_vol_should = {}
@@ -2450,25 +2836,18 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
      feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
  
-    # We will make nodes contact all nodes in their group, and one node from
-    # every other group.
-    # TODO: should it be a *random* node, different every time?
-    online_nodes = [node.name for node in node_data_list if not node.offline]
-    other_group_nodes = {}
-
-    for name in sorted(self.all_node_info):
-      node = self.all_node_info[name]
-      if (node.group not in other_group_nodes
-          and node.group != self.group_uuid
-          and not node.offline):
-        other_group_nodes[node.group] = node.name
+    user_scripts = []
+    if self.cfg.GetUseExternalMipScript():
+      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
  
      node_verify_param = {
        constants.NV_FILELIST:
          utils.UniqueSequence(filename
                               for files in filemap
                               for filename in files),
-      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
+      constants.NV_NODELIST:
+        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
+                                  self.all_node_info.values()),
        constants.NV_HYPERVISOR: hypervisors,
        constants.NV_HVPARAMS:
          _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
@@ -2483,6 +2862,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        constants.NV_MASTERIP: (master_node, master_ip),
        constants.NV_OSLIST: None,
        constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
+      constants.NV_USERSCRIPTS: user_scripts,
        }
  
      if vg_name is not None:
@@ -2557,6 +2937,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
      all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
                                             node_verify_param,
                                             self.cfg.GetClusterName())
+    nvinfo_endtime = time.time()
+
      if self.extra_lv_nodes and vg_name is not None:
        extra_lv_nvinfo = \
            self.rpc.call_node_verify(self.extra_lv_nodes,
@@ -2564,7 +2946,6 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
                                      self.cfg.GetClusterName())
      else:
        extra_lv_nvinfo = {}
-    nvinfo_endtime = time.time()
  
      all_drbd_map = self.cfg.ComputeDRBDMap()
  
@@ -2629,7 +3010,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          feedback_fn("* Verifying node %s (%s)" % (node, ntype))
  
        msg = all_nvinfo[node].fail_msg
-      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
+      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
+               msg)
        if msg:
          nimg.rpc_fail = True
          continue
@@ -2639,6 +3021,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        nimg.call_ok = self._VerifyNode(node_i, nresult)
        self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
        self._VerifyNodeNetwork(node_i, nresult)
+      self._VerifyNodeUserScripts(node_i, nresult)
        self._VerifyOob(node_i, nresult)
  
        if nimg.vm_capable:
@@ -2663,10 +3046,16 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          non_primary_inst = set(nimg.instances).difference(nimg.pinst)
  
          for inst in non_primary_inst:
+          # FIXME: investigate best way to handle offline insts
+          if inst.admin_state == constants.ADMINST_OFFLINE:
+            if verbose:
+              feedback_fn("* Skipping offline instance %s" % inst.name)
+            i_offline += 1
+            continue
            test = inst in self.all_inst_info
-          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
+          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
                     "instance should not run on node %s", node_i.name)
-          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
+          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
                     "node is running unknown instance %s", inst)
  
      for node, result in extra_lv_nvinfo.items():
@@ -2685,11 +3074,12 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        pnode = inst_config.primary_node
        pnode_img = node_image[pnode]
        _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
-               self.ENODERPC, pnode, "instance %s, connection to"
+               constants.CV_ENODERPC, pnode, "instance %s, connection to"
                 " primary node failed", instance)
  
-      _ErrorIf(inst_config.admin_up and pnode_img.offline,
-               self.EINSTANCEBADNODE, instance,
+      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
+               pnode_img.offline,
+               constants.CV_EINSTANCEBADNODE, instance,
                 "instance is marked as running and lives on offline node %s",
                 inst_config.primary_node)
  
@@ -2701,7 +3091,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        if not inst_config.secondary_nodes:
          i_non_redundant.append(instance)
  
-      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
+      _ErrorIf(len(inst_config.secondary_nodes) > 1,
+               constants.CV_EINSTANCELAYOUT,
                 instance, "instance has multiple secondary nodes: %s",
                 utils.CommaJoin(inst_config.secondary_nodes),
                 code=self.ETYPE_WARNING)
@@ -2722,7 +3113,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
                                       key=lambda (_, nodes): pnode in nodes,
                                       reverse=True)]
  
-        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
+        self._ErrorIf(len(instance_groups) > 1,
+                      constants.CV_EINSTANCESPLITGROUPS,
                        instance, "instance has primary and secondary nodes in"
                        " different groups: %s", utils.CommaJoin(pretty_list),
                        code=self.ETYPE_WARNING)
@@ -2732,21 +3124,22 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
  
        for snode in inst_config.secondary_nodes:
          s_img = node_image[snode]
-        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
-                 "instance %s, connection to secondary node failed", instance)
+        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
+                 snode, "instance %s, connection to secondary node failed",
+                 instance)
  
          if s_img.offline:
            inst_nodes_offline.append(snode)
  
        # warn that the instance lives on offline nodes
-      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
+      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
                 "instance has offline secondary node(s) %s",
                 utils.CommaJoin(inst_nodes_offline))
        # ... or ghost/non-vm_capable nodes
        for node in inst_config.all_nodes:
-        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
-                 "instance lives on ghost node %s", node)
-        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
+        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
+                 instance, "instance lives on ghost node %s", node)
+        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
                   instance, "instance lives on non-vm_capable node %s", node)
  
      feedback_fn("* Verifying orphan volumes")
@@ -2777,6 +3170,9 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
                    % len(i_non_a_balanced))
  
+    if i_offline:
+      feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
+
      if n_offline:
        feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
  
@@ -2800,9 +3196,12 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          and hook results
  
      """
-    # We only really run POST phase hooks, and are only interested in
-    # their results
-    if phase == constants.HOOKS_PHASE_POST:
+    # We only really run POST phase hooks, only for non-empty groups,
+    # and are only interested in their results
+    if not self.my_node_names:
+      # empty node group
+      pass
+    elif phase == constants.HOOKS_PHASE_POST:
        # Used to change hooks' output to proper indentation
        feedback_fn("* Hooks Results")
        assert hooks_results, "invalid result from hooks"
@@ -2811,24 +3210,22 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          res = hooks_results[node_name]
          msg = res.fail_msg
          test = msg and not res.offline
-        self._ErrorIf(test, self.ENODEHOOKS, node_name,
+        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
                        "Communication failure in hooks execution: %s", msg)
          if res.offline or msg:
-          # No need to investigate payload if node is offline or gave an error.
-          # override manually lu_result here as _ErrorIf only
-          # overrides self.bad
-          lu_result = 1
+          # No need to investigate payload if node is offline or gave
+          # an error.
            continue
          for script, hkr, output in res.payload:
            test = hkr == constants.HKR_FAIL
-          self._ErrorIf(test, self.ENODEHOOKS, node_name,
+          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
                          "Script %s failed, output:", script)
            if test:
-            output = self._HOOKS_INDENT_RE.sub('      ', output)
+            output = self._HOOKS_INDENT_RE.sub("      ", output)
              feedback_fn("%s" % output)
-            lu_result = 0
+            lu_result = False
  
-      return lu_result
+    return lu_result
  
  
  class LUClusterVerifyDisks(NoHooksLU):
@@ -2838,11 +3235,91 @@ class LUClusterVerifyDisks(NoHooksLU):
    REQ_BGL = False
  
    def ExpandNames(self):
+    self.share_locks = _ShareAll()
      self.needed_locks = {
-      locking.LEVEL_NODE: locking.ALL_SET,
-      locking.LEVEL_INSTANCE: locking.ALL_SET,
-    }
-    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+      locking.LEVEL_NODEGROUP: locking.ALL_SET,
+      }
+
+  def Exec(self, feedback_fn):
+    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
+
+    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
+    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
+                           for group in group_names])
+
+
+class LUGroupVerifyDisks(NoHooksLU):
+  """Verifies the status of all disks in a node group.
+
+  """
+  REQ_BGL = False
+
+  def ExpandNames(self):
+    # Raises errors.OpPrereqError on its own if group can't be found
+    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
+
+    self.share_locks = _ShareAll()
+    self.needed_locks = {
+      locking.LEVEL_INSTANCE: [],
+      locking.LEVEL_NODEGROUP: [],
+      locking.LEVEL_NODE: [],
+      }
+
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_INSTANCE:
+      assert not self.needed_locks[locking.LEVEL_INSTANCE]
+
+      # Lock instances optimistically, needs verification once node and group
+      # locks have been acquired
+      self.needed_locks[locking.LEVEL_INSTANCE] = \
+        self.cfg.GetNodeGroupInstances(self.group_uuid)
+
+    elif level == locking.LEVEL_NODEGROUP:
+      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
+
+      self.needed_locks[locking.LEVEL_NODEGROUP] = \
+        set([self.group_uuid] +
+            # Lock all groups used by instances optimistically; this requires
+            # going via the node before it's locked, requiring verification
+            # later on
+            [group_uuid
+             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
+             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
+
+    elif level == locking.LEVEL_NODE:
+      # This will only lock the nodes in the group to be verified which contain
+      # actual instances
+      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+      self._LockInstancesNodes()
+
+      # Lock all nodes in group to be verified
+      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
+      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
+      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
+
+  def CheckPrereq(self):
+    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
+
+    assert self.group_uuid in owned_groups
+
+    # Check if locked instances are still correct
+    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
+
+    # Get instance information
+    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
+
+    # Check if node groups for locked instances are still correct
+    for (instance_name, inst) in self.instances.items():
+      assert owned_nodes.issuperset(inst.all_nodes), \
+        "Instance %s's nodes changed while we kept the lock" % instance_name
+
+      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
+                                             owned_groups)
+
+      assert self.group_uuid in inst_groups, \
+        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
  
    def Exec(self, feedback_fn):
      """Verify integrity of cluster disks.
@@ -2853,50 +3330,41 @@ class LUClusterVerifyDisks(NoHooksLU):
          missing volumes
  
      """
-    result = res_nodes, res_instances, res_missing = {}, [], {}
+    res_nodes = {}
+    res_instances = set()
+    res_missing = {}
  
-    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
-    instances = self.cfg.GetAllInstancesInfo().values()
+    nv_dict = _MapInstanceDisksToNodes([inst
+            for inst in self.instances.values()
+            if inst.admin_state == constants.ADMINST_UP])
  
-    nv_dict = {}
-    for inst in instances:
-      inst_lvs = {}
-      if not inst.admin_up:
-        continue
-      inst.MapLVsByNode(inst_lvs)
-      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
-      for node, vol_list in inst_lvs.iteritems():
-        for vol in vol_list:
-          nv_dict[(node, vol)] = inst
-
-    if not nv_dict:
-      return result
-
-    node_lvs = self.rpc.call_lv_list(nodes, [])
-    for node, node_res in node_lvs.items():
-      if node_res.offline:
-        continue
-      msg = node_res.fail_msg
-      if msg:
-        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
-        res_nodes[node] = msg
-        continue
+    if nv_dict:
+      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
+                             set(self.cfg.GetVmCapableNodeList()))
  
-      lvs = node_res.payload
-      for lv_name, (_, _, lv_online) in lvs.items():
-        inst = nv_dict.pop((node, lv_name), None)
-        if (not lv_online and inst is not None
-            and inst.name not in res_instances):
-          res_instances.append(inst.name)
+      node_lvs = self.rpc.call_lv_list(nodes, [])
  
-    # any leftover items in nv_dict are missing LVs, let's arrange the
-    # data better
-    for key, inst in nv_dict.iteritems():
-      if inst.name not in res_missing:
-        res_missing[inst.name] = []
-      res_missing[inst.name].append(key)
+      for (node, node_res) in node_lvs.items():
+        if node_res.offline:
+          continue
  
-    return result
+        msg = node_res.fail_msg
+        if msg:
+          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
+          res_nodes[node] = msg
+          continue
+
+        for lv_name, (_, _, lv_online) in node_res.payload.items():
+          inst = nv_dict.pop((node, lv_name), None)
+          if not (lv_online or inst is None):
+            res_instances.add(inst)
+
+      # any leftover items in nv_dict are missing LVs, let's arrange the data
+      # better
+      for key, inst in nv_dict.iteritems():
+        res_missing.setdefault(inst, []).append(list(key))
+
+    return (res_nodes, list(res_instances), res_missing)
  
  
  class LUClusterRepairDiskSizes(NoHooksLU):
@@ -2909,21 +3377,24 @@ class LUClusterRepairDiskSizes(NoHooksLU):
      if self.op.instances:
        self.wanted_names = _GetWantedInstances(self, self.op.instances)
        self.needed_locks = {
-        locking.LEVEL_NODE: [],
+        locking.LEVEL_NODE_RES: [],
          locking.LEVEL_INSTANCE: self.wanted_names,
          }
-      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
      else:
        self.wanted_names = None
        self.needed_locks = {
-        locking.LEVEL_NODE: locking.ALL_SET,
+        locking.LEVEL_NODE_RES: locking.ALL_SET,
          locking.LEVEL_INSTANCE: locking.ALL_SET,
          }
-    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+    self.share_locks = {
+      locking.LEVEL_NODE_RES: 1,
+      locking.LEVEL_INSTANCE: 0,
+      }
  
    def DeclareLocks(self, level):
-    if level == locking.LEVEL_NODE and self.wanted_names is not None:
-      self._LockInstancesNodes(primary_only=True)
+    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
+      self._LockInstancesNodes(primary_only=True, level=level)
  
    def CheckPrereq(self):
      """Check prerequisites.
@@ -2932,10 +3403,10 @@ class LUClusterRepairDiskSizes(NoHooksLU):
  
      """
      if self.wanted_names is None:
-      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
+      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
  
-    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
-                             in self.wanted_names]
+    self.wanted_instances = \
+        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
  
    def _EnsureChildSizes(self, disk):
      """Ensure children of the disk have the needed disk size.
@@ -2974,6 +3445,11 @@ class LUClusterRepairDiskSizes(NoHooksLU):
        for idx, disk in enumerate(instance.disks):
          per_node_disks[pnode].append((instance, idx, disk))
  
+    assert not (frozenset(per_node_disks.keys()) -
+                self.owned_locks(locking.LEVEL_NODE_RES)), \
+      "Not owning correct locks"
+    assert not self.owned_locks(locking.LEVEL_NODE)
+
      changed = []
      for node, dskl in per_node_disks.items():
        newl = [v[2].Copy() for v in dskl]
@@ -3063,29 +3539,33 @@ class LUClusterRename(LogicalUnit):
  
      """
      clustername = self.op.name
-    ip = self.ip
+    new_ip = self.ip
  
      # shutdown the master IP
-    master = self.cfg.GetMasterNode()
-    result = self.rpc.call_node_stop_master(master, False)
+    master_params = self.cfg.GetMasterNetworkParameters()
+    ems = self.cfg.GetUseExternalMipScript()
+    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
+                                                     master_params, ems)
      result.Raise("Could not disable the master role")
  
      try:
        cluster = self.cfg.GetClusterInfo()
        cluster.cluster_name = clustername
-      cluster.master_ip = ip
+      cluster.master_ip = new_ip
        self.cfg.Update(cluster, feedback_fn)
  
        # update the known hosts file
        ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
        node_list = self.cfg.GetOnlineNodeList()
        try:
-        node_list.remove(master)
+        node_list.remove(master_params.name)
        except ValueError:
          pass
        _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
      finally:
-      result = self.rpc.call_node_start_master(master, False, False)
+      master_params.ip = new_ip
+      result = self.rpc.call_node_activate_master_ip(master_params.name,
+                                                     master_params, ems)
        msg = result.fail_msg
        if msg:
          self.LogWarning("Could not re-enable the master role on"
@@ -3094,8 +3574,29 @@ class LUClusterRename(LogicalUnit):
      return clustername
  
  
-class LUClusterSetParams(LogicalUnit):
-  """Change the parameters of the cluster.
+def _ValidateNetmask(cfg, netmask):
+  """Checks if a netmask is valid.
+
+  @type cfg: L{config.ConfigWriter}
+  @param cfg: The cluster configuration
+  @type netmask: int
+  @param netmask: the netmask to be verified
+  @raise errors.OpPrereqError: if the validation fails
+
+  """
+  ip_family = cfg.GetPrimaryIPFamily()
+  try:
+    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
+  except errors.ProgrammerError:
+    raise errors.OpPrereqError("Invalid primary ip family: %s." %
+                               ip_family)
+  if not ipcls.ValidateNetmask(netmask):
+    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
+                                (netmask))
+
+
+class LUClusterSetParams(LogicalUnit):
+  """Change the parameters of the cluster.
  
    """
    HPATH = "cluster-modify"
@@ -3115,6 +3616,13 @@ class LUClusterSetParams(LogicalUnit):
      if self.op.remove_uids:
        uidpool.CheckUidPool(self.op.remove_uids)
  
+    if self.op.master_netmask is not None:
+      _ValidateNetmask(self.cfg, self.op.master_netmask)
+
+    if self.op.diskparams:
+      for dt_params in self.op.diskparams.values():
+        utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
+
    def ExpandNames(self):
      # FIXME: in the future maybe other cluster params won't require checking on
      # all nodes to be modified.
@@ -3157,7 +3665,7 @@ class LUClusterSetParams(LogicalUnit):
                                     " drbd-based instances exist",
                                     errors.ECODE_INVAL)
  
-    node_list = self.glm.list_owned(locking.LEVEL_NODE)
+    node_list = self.owned_locks(locking.LEVEL_NODE)
  
      # if vg_name not None, checks given volume group on all nodes
      if self.op.vg_name:
@@ -3179,8 +3687,7 @@ class LUClusterSetParams(LogicalUnit):
      if self.op.drbd_helper:
        # checks given drbd helper on all nodes
        helpers = self.rpc.call_drbd_helper(node_list)
-      for node in node_list:
-        ninfo = self.cfg.GetNodeInfo(node)
+      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
          if ninfo.offline:
            self.LogInfo("Not checking drbd helper on offline node %s", node)
            continue
@@ -3197,6 +3704,7 @@ class LUClusterSetParams(LogicalUnit):
      self.cluster = cluster = self.cfg.GetClusterInfo()
      # validate params changes
      if self.op.beparams:
+      objects.UpgradeBeParams(self.op.beparams)
        utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
        self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
  
@@ -3210,6 +3718,29 @@ class LUClusterSetParams(LogicalUnit):
          self.new_ndparams["oob_program"] = \
              constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
  
+    if self.op.hv_state:
+      new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
+                                            self.cluster.hv_state_static)
+      self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
+                               for hv, values in new_hv_state.items())
+
+    if self.op.disk_state:
+      new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
+                                                self.cluster.disk_state_static)
+      self.new_disk_state = \
+        dict((storage, dict((name, cluster.SimpleFillDiskState(values))
+                            for name, values in svalues.items()))
+             for storage, svalues in new_disk_state.items())
+
+    if self.op.ipolicy:
+      ipolicy = {}
+      for key, value in self.op.ipolicy.items():
+        utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
+        ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
+                                          value)
+      objects.InstancePolicy.CheckParameterSyntax(ipolicy)
+      self.new_ipolicy = ipolicy
+
      if self.op.nicparams:
        utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
        self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
@@ -3247,6 +3778,15 @@ class LUClusterSetParams(LogicalUnit):
          else:
            self.new_hvparams[hv_name].update(hv_dict)
  
+    # disk template parameters
+    self.new_diskparams = objects.FillDict(cluster.diskparams, {})
+    if self.op.diskparams:
+      for dt_name, dt_params in self.op.diskparams.items():
+        if dt_name not in self.op.diskparams:
+          self.new_diskparams[dt_name] = dt_params
+        else:
+          self.new_diskparams[dt_name].update(dt_params)
+
      # os hypervisor parameters
      self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
      if self.op.os_hvp:
@@ -3361,10 +3901,18 @@ class LUClusterSetParams(LogicalUnit):
        self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
      if self.op.nicparams:
        self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
+    if self.op.ipolicy:
+      self.cluster.ipolicy = self.new_ipolicy
      if self.op.osparams:
        self.cluster.osparams = self.new_osp
      if self.op.ndparams:
        self.cluster.ndparams = self.new_ndparams
+    if self.op.diskparams:
+      self.cluster.diskparams = self.new_diskparams
+    if self.op.hv_state:
+      self.cluster.hv_state_static = self.new_hv_state
+    if self.op.disk_state:
+      self.cluster.disk_state_static = self.new_disk_state
  
      if self.op.candidate_pool_size is not None:
        self.cluster.candidate_pool_size = self.op.candidate_pool_size
@@ -3372,6 +3920,9 @@ class LUClusterSetParams(LogicalUnit):
        _AdjustCandidatePool(self, [])
  
      if self.op.maintain_node_health is not None:
+      if self.op.maintain_node_health and not constants.ENABLE_CONFD:
+        feedback_fn("Note: CONFD was disabled at build time, node health"
+                    " maintenance is not useful (still enabling it)")
        self.cluster.maintain_node_health = self.op.maintain_node_health
  
      if self.op.prealloc_wipe_disks is not None:
@@ -3392,6 +3943,9 @@ class LUClusterSetParams(LogicalUnit):
      if self.op.reserved_lvs is not None:
        self.cluster.reserved_lvs = self.op.reserved_lvs
  
+    if self.op.use_external_mip_script is not None:
+      self.cluster.use_external_mip_script = self.op.use_external_mip_script
+
      def helper_os(aname, mods, desc):
        desc += " OS list"
        lst = getattr(self.cluster, aname)
@@ -3416,21 +3970,40 @@ class LUClusterSetParams(LogicalUnit):
        helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
  
      if self.op.master_netdev:
-      master = self.cfg.GetMasterNode()
+      master_params = self.cfg.GetMasterNetworkParameters()
+      ems = self.cfg.GetUseExternalMipScript()
        feedback_fn("Shutting down master ip on the current netdev (%s)" %
                    self.cluster.master_netdev)
-      result = self.rpc.call_node_stop_master(master, False)
+      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
+                                                       master_params, ems)
        result.Raise("Could not disable the master ip")
        feedback_fn("Changing master_netdev from %s to %s" %
-                  (self.cluster.master_netdev, self.op.master_netdev))
+                  (master_params.netdev, self.op.master_netdev))
        self.cluster.master_netdev = self.op.master_netdev
  
+    if self.op.master_netmask:
+      master_params = self.cfg.GetMasterNetworkParameters()
+      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
+      result = self.rpc.call_node_change_master_netmask(master_params.name,
+                                                        master_params.netmask,
+                                                        self.op.master_netmask,
+                                                        master_params.ip,
+                                                        master_params.netdev)
+      if result.fail_msg:
+        msg = "Could not change the master IP netmask: %s" % result.fail_msg
+        feedback_fn(msg)
+
+      self.cluster.master_netmask = self.op.master_netmask
+
      self.cfg.Update(self.cluster, feedback_fn)
  
      if self.op.master_netdev:
+      master_params = self.cfg.GetMasterNetworkParameters()
        feedback_fn("Starting the master ip on the new master netdev (%s)" %
                    self.op.master_netdev)
-      result = self.rpc.call_node_start_master(master, False, False)
+      ems = self.cfg.GetUseExternalMipScript()
+      result = self.rpc.call_node_activate_master_ip(master_params.name,
+                                                     master_params, ems)
        if result.fail_msg:
          self.LogWarning("Could not re-enable the master ip on"
                          " the master, please restart manually: %s",
@@ -3463,36 +4036,58 @@ def _ComputeAncillaryFiles(cluster, redist):
      constants.SSH_KNOWN_HOSTS_FILE,
      constants.CONFD_HMAC_KEY,
      constants.CLUSTER_DOMAIN_SECRET_FILE,
+    constants.SPICE_CERT_FILE,
+    constants.SPICE_CACERT_FILE,
+    constants.RAPI_USERS_FILE,
      ])
  
    if not redist:
      files_all.update(constants.ALL_CERT_FILES)
      files_all.update(ssconf.SimpleStore().GetFileList())
+  else:
+    # we need to ship at least the RAPI certificate
+    files_all.add(constants.RAPI_CERT_FILE)
  
    if cluster.modify_etc_hosts:
      files_all.add(constants.ETC_HOSTS)
  
-  # Files which must either exist on all nodes or on none
-  files_all_opt = set([
+  # Files which are optional, these must:
+  # - be present in one other category as well
+  # - either exist or not exist on all nodes of that category (mc, vm all)
+  files_opt = set([
      constants.RAPI_USERS_FILE,
      ])
  
    # Files which should only be on master candidates
    files_mc = set()
+
    if not redist:
      files_mc.add(constants.CLUSTER_CONF_FILE)
  
+    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
+    # replication
+    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
+
    # Files which should only be on VM-capable nodes
    files_vm = set(filename
      for hv_name in cluster.enabled_hypervisors
-    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
+    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
+
+  files_opt |= set(filename
+    for hv_name in cluster.enabled_hypervisors
+    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
  
-  # Filenames must be unique
-  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
-          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
+  # Filenames in each category must be unique
+  all_files_set = files_all | files_mc | files_vm
+  assert (len(all_files_set) ==
+          sum(map(len, [files_all, files_mc, files_vm]))), \
           "Found file listed in more than one file list"
  
-  return (files_all, files_all_opt, files_mc, files_vm)
+  # Optional files must be present in one other category
+  assert all_files_set.issuperset(files_opt), \
+         "Optional file not in a different required list"
+
+  return (files_all, files_opt, files_mc, files_vm)
  
  
  def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
@@ -3526,7 +4121,7 @@ def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
        nodelist.remove(master_info.name)
  
    # Gather file lists
-  (files_all, files_all_opt, files_mc, files_vm) = \
+  (files_all, _, files_mc, files_vm) = \
      _ComputeAncillaryFiles(cluster, True)
  
    # Never re-distribute configuration file from here
@@ -3536,7 +4131,6 @@ def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
  
    filemap = [
      (online_nodes, files_all),
-    (online_nodes, files_all_opt),
      (vm_nodes, files_vm),
      ]
  
@@ -3568,6 +4162,36 @@ class LUClusterRedistConf(NoHooksLU):
      _RedistributeAncillaryFiles(self)
  
  
+class LUClusterActivateMasterIp(NoHooksLU):
+  """Activate the master IP on the master node.
+
+  """
+  def Exec(self, feedback_fn):
+    """Activate the master IP.
+
+    """
+    master_params = self.cfg.GetMasterNetworkParameters()
+    ems = self.cfg.GetUseExternalMipScript()
+    result = self.rpc.call_node_activate_master_ip(master_params.name,
+                                                   master_params, ems)
+    result.Raise("Could not activate the master IP")
+
+
+class LUClusterDeactivateMasterIp(NoHooksLU):
+  """Deactivate the master IP on the master node.
+
+  """
+  def Exec(self, feedback_fn):
+    """Deactivate the master IP.
+
+    """
+    master_params = self.cfg.GetMasterNetworkParameters()
+    ems = self.cfg.GetUseExternalMipScript()
+    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
+                                                     master_params, ems)
+    result.Raise("Could not deactivate the master IP")
+
+
  def _WaitForSync(lu, instance, disks=None, oneshot=False):
    """Sleep and poll for an instance's disk to sync.
  
@@ -3739,9 +4363,7 @@ class LUOobCommand(NoHooksLU):
      if self.op.command in self._SKIP_MASTER:
        assert self.master_node not in self.op.node_names
  
-    for node_name in self.op.node_names:
-      node = self.cfg.GetNodeInfo(node_name)
-
+    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
        if node is None:
          raise errors.OpPrereqError("Node %s not found" % node_name,
                                     errors.ECODE_NOENT)
@@ -3858,6 +4480,7 @@ class LUOobCommand(NoHooksLU):
        raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
                                 utils.CommaJoin(errs))
  
+
  class _OsQuery(_QueryBase):
    FIELDS = query.OS_FIELDS
  
@@ -4065,15 +4688,12 @@ class LUNodeRemove(LogicalUnit):
      node = self.cfg.GetNodeInfo(self.op.node_name)
      assert node is not None
  
-    instance_list = self.cfg.GetInstanceList()
-
      masternode = self.cfg.GetMasterNode()
      if node.name == masternode:
        raise errors.OpPrereqError("Node is the master node, failover to another"
                                   " node is required", errors.ECODE_INVAL)
  
-    for instance_name in instance_list:
-      instance = self.cfg.GetInstanceInfo(instance_name)
+    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
        if node.name in instance.all_nodes:
          raise errors.OpPrereqError("Instance %s is still running on the node,"
                                     " please remove first" % instance_name,
@@ -4091,6 +4711,9 @@ class LUNodeRemove(LogicalUnit):
  
      modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
  
+    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
+      "Not owning BGL"
+
      # Promote nodes to master candidate as needed
      _AdjustCandidatePool(self, exceptions=[node.name])
      self.context.RemoveNode(node.name)
@@ -4119,7 +4742,7 @@ class _NodeQuery(_QueryBase):
  
    def ExpandNames(self, lu):
      lu.needed_locks = {}
-    lu.share_locks[locking.LEVEL_NODE] = 1
+    lu.share_locks = _ShareAll()
  
      if self.names:
        self.wanted = _GetWantedNodes(lu, self.names)
@@ -4130,7 +4753,7 @@ class _NodeQuery(_QueryBase):
                         query.NQ_LIVE in self.requested_data)
  
      if self.do_locking:
-      # if we don't request only static fields, we need to lock the nodes
+      # If any non-static field is requested we need to lock the nodes
        lu.needed_locks[locking.LEVEL_NODE] = self.wanted
  
    def DeclareLocks(self, lu, level):
@@ -4149,9 +4772,9 @@ class _NodeQuery(_QueryBase):
        # filter out non-vm_capable nodes
        toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
  
-      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
-                                        lu.cfg.GetHypervisorType())
-      live_data = dict((name, nresult.payload)
+      node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
+                                        [lu.cfg.GetHypervisorType()])
+      live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
                         for (name, nresult) in node_data.items()
                         if not nresult.fail_msg and nresult.payload)
      else:
@@ -4194,7 +4817,7 @@ class LUNodeQuery(NoHooksLU):
    """Logical unit for querying nodes.
  
    """
-  # pylint: disable-msg=W0142
+  # pylint: disable=W0142
    REQ_BGL = False
  
    def CheckArguments(self):
@@ -4204,6 +4827,9 @@ class LUNodeQuery(NoHooksLU):
    def ExpandNames(self):
      self.nq.ExpandNames(self)
  
+  def DeclareLocks(self, level):
+    self.nq.DeclareLocks(self, level)
+
    def Exec(self, feedback_fn):
      return self.nq.OldStyleQuery(self)
  
@@ -4222,8 +4848,9 @@ class LUNodeQueryvols(NoHooksLU):
                         selected=self.op.output_fields)
  
    def ExpandNames(self):
+    self.share_locks = _ShareAll()
      self.needed_locks = {}
-    self.share_locks[locking.LEVEL_NODE] = 1
+
      if not self.op.nodes:
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
      else:
@@ -4234,13 +4861,11 @@ class LUNodeQueryvols(NoHooksLU):
      """Computes the list of nodes and their attributes.
  
      """
-    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
+    nodenames = self.owned_locks(locking.LEVEL_NODE)
      volumes = self.rpc.call_node_volumes(nodenames)
  
-    ilist = [self.cfg.GetInstanceInfo(iname) for iname
-             in self.cfg.GetInstanceList()]
-
-    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
+    ilist = self.cfg.GetAllInstancesInfo()
+    vol2inst = _MapInstanceDisksToNodes(ilist.values())
  
      output = []
      for node in nodenames:
@@ -4252,8 +4877,8 @@ class LUNodeQueryvols(NoHooksLU):
          self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
          continue
  
-      node_vols = nresult.payload[:]
-      node_vols.sort(key=lambda vol: vol['dev'])
+      node_vols = sorted(nresult.payload,
+                         key=operator.itemgetter("dev"))
  
        for vol in node_vols:
          node_output = []
@@ -4261,22 +4886,15 @@ class LUNodeQueryvols(NoHooksLU):
            if field == "node":
              val = node
            elif field == "phys":
-            val = vol['dev']
+            val = vol["dev"]
            elif field == "vg":
-            val = vol['vg']
+            val = vol["vg"]
            elif field == "name":
-            val = vol['name']
+            val = vol["name"]
            elif field == "size":
-            val = int(float(vol['size']))
+            val = int(float(vol["size"]))
            elif field == "instance":
-            for inst in ilist:
-              if node not in lv_by_node[inst]:
-                continue
-              if vol['name'] in lv_by_node[inst][node]:
-                val = inst.name
-                break
-            else:
-              val = '-'
+            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
            else:
              raise errors.ParameterError(field)
            node_output.append(str(val))
@@ -4299,8 +4917,8 @@ class LUNodeQueryStorage(NoHooksLU):
                         selected=self.op.output_fields)
  
    def ExpandNames(self):
+    self.share_locks = _ShareAll()
      self.needed_locks = {}
-    self.share_locks[locking.LEVEL_NODE] = 1
  
      if self.op.nodes:
        self.needed_locks[locking.LEVEL_NODE] = \
@@ -4312,7 +4930,7 @@ class LUNodeQueryStorage(NoHooksLU):
      """Computes the list of nodes and their attributes.
  
      """
-    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
+    self.nodes = self.owned_locks(locking.LEVEL_NODE)
  
      # Always get name to sort by
      if constants.SF_NAME in self.op.output_fields:
@@ -4374,8 +4992,7 @@ class _InstanceQuery(_QueryBase):
  
    def ExpandNames(self, lu):
      lu.needed_locks = {}
-    lu.share_locks[locking.LEVEL_INSTANCE] = 1
-    lu.share_locks[locking.LEVEL_NODE] = 1
+    lu.share_locks = _ShareAll()
  
      if self.names:
        self.wanted = _GetWantedInstances(lu, self.names)
@@ -4386,17 +5003,43 @@ class _InstanceQuery(_QueryBase):
                         query.IQ_LIVE in self.requested_data)
      if self.do_locking:
        lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
+      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
        lu.needed_locks[locking.LEVEL_NODE] = []
        lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
  
+    self.do_grouplocks = (self.do_locking and
+                          query.IQ_NODES in self.requested_data)
+
    def DeclareLocks(self, lu, level):
-    if level == locking.LEVEL_NODE and self.do_locking:
-      lu._LockInstancesNodes() # pylint: disable-msg=W0212
+    if self.do_locking:
+      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
+        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
+
+        # Lock all groups used by instances optimistically; this requires going
+        # via the node before it's locked, requiring verification later on
+        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
+          set(group_uuid
+              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
+              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
+      elif level == locking.LEVEL_NODE:
+        lu._LockInstancesNodes() # pylint: disable=W0212
+
+  @staticmethod
+  def _CheckGroupLocks(lu):
+    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
+    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
+
+    # Check if node groups for locked instances are still correct
+    for instance_name in owned_instances:
+      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
  
    def _GetQueryData(self, lu):
      """Computes the list of instances and their attributes.
  
      """
+    if self.do_grouplocks:
+      self._CheckGroupLocks(lu)
+
      cluster = lu.cfg.GetClusterInfo()
      all_info = lu.cfg.GetAllInstancesInfo()
  
@@ -4459,22 +5102,34 @@ class _InstanceQuery(_QueryBase):
      else:
        consinfo = None
  
+    if query.IQ_NODES in self.requested_data:
+      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
+                                            instance_list)))
+      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
+      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
+                    for uuid in set(map(operator.attrgetter("group"),
+                                        nodes.values())))
+    else:
+      nodes = None
+      groups = None
+
      return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
                                     disk_usage, offline_nodes, bad_nodes,
-                                   live_data, wrongnode_inst, consinfo)
+                                   live_data, wrongnode_inst, consinfo,
+                                   nodes, groups)
  
  
  class LUQuery(NoHooksLU):
    """Query for resources/items of a certain kind.
  
    """
-  # pylint: disable-msg=W0142
+  # pylint: disable=W0142
    REQ_BGL = False
  
    def CheckArguments(self):
      qcls = _GetQueryImplementation(self.op.what)
  
-    self.impl = qcls(self.op.filter, self.op.fields, False)
+    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
  
    def ExpandNames(self):
      self.impl.ExpandNames(self)
@@ -4490,7 +5145,7 @@ class LUQueryFields(NoHooksLU):
    """Query for resources/items of a certain kind.
  
    """
-  # pylint: disable-msg=W0142
+  # pylint: disable=W0142
    REQ_BGL = False
  
    def CheckArguments(self):
@@ -4630,9 +5285,7 @@ class LUNodeAdd(LogicalUnit):
  
      self.changed_primary_ip = False
  
-    for existing_node_name in node_list:
-      existing_node = cfg.GetNodeInfo(existing_node_name)
-
+    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
        if self.op.readd and node == existing_node_name:
          if existing_node.secondary_ip != secondary_ip:
            raise errors.OpPrereqError("Readded node doesn't have the same IP"
@@ -4731,6 +5384,9 @@ class LUNodeAdd(LogicalUnit):
      new_node = self.new_node
      node = new_node.name
  
+    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
+      "Not owning BGL"
+
      # We adding a new node so we assume it's powered
      new_node.powered = True
  
@@ -4739,7 +5395,7 @@ class LUNodeAdd(LogicalUnit):
      # later in the procedure; this also means that if the re-add
      # fails, we are left with a non-offlined, broken node
      if self.op.readd:
-      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
+      new_node.drained = new_node.offline = False # pylint: disable=W0201
        self.LogInfo("Readding a node, the offline/drained flags were reset")
        # if we demote the node, we do cleanup later in the procedure
        new_node.master_candidate = self.master_candidate
@@ -4785,7 +5441,7 @@ class LUNodeAdd(LogicalUnit):
  
      node_verify_list = [self.cfg.GetMasterNode()]
      node_verify_param = {
-      constants.NV_NODELIST: [node],
+      constants.NV_NODELIST: ([node], {}),
        # TODO: do a node-net-test as well?
      }
  
@@ -4845,7 +5501,8 @@ class LUNodeSetParams(LogicalUnit):
      self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
      all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
                  self.op.master_capable, self.op.vm_capable,
-                self.op.secondary_ip, self.op.ndparams]
+                self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
+                self.op.disk_state]
      if all_mods.count(None) == len(all_mods):
        raise errors.OpPrereqError("Please pass at least one modification",
                                   errors.ECODE_INVAL)
@@ -4869,35 +5526,32 @@ class LUNodeSetParams(LogicalUnit):
      self.lock_all = self.op.auto_promote and self.might_demote
      self.lock_instances = self.op.secondary_ip is not None
  
+  def _InstanceFilter(self, instance):
+    """Filter for getting affected instances.
+
+    """
+    return (instance.disk_template in constants.DTS_INT_MIRROR and
+            self.op.node_name in instance.all_nodes)
+
    def ExpandNames(self):
      if self.lock_all:
        self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
      else:
        self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
  
-    if self.lock_instances:
-      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
+    # Since modifying a node can have severe effects on currently running
+    # operations the resource lock is at least acquired in shared mode
+    self.needed_locks[locking.LEVEL_NODE_RES] = \
+      self.needed_locks[locking.LEVEL_NODE]
  
-  def DeclareLocks(self, level):
-    # If we have locked all instances, before waiting to lock nodes, release
-    # all the ones living on nodes unrelated to the current operation.
-    if level == locking.LEVEL_NODE and self.lock_instances:
-      self.affected_instances = []
-      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
-        instances_keep = []
-
-        # Build list of instances to release
-        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
-          instance = self.context.cfg.GetInstanceInfo(instance_name)
-          if (instance.disk_template in constants.DTS_INT_MIRROR and
-              self.op.node_name in instance.all_nodes):
-            instances_keep.append(instance_name)
-            self.affected_instances.append(instance)
-
-        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
-
-        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
-                set(instances_keep))
+    # Get node resource and instance locks in shared mode; they are not used
+    # for anything but read-only access
+    self.share_locks[locking.LEVEL_NODE_RES] = 1
+    self.share_locks[locking.LEVEL_INSTANCE] = 1
+
+    if self.lock_instances:
+      self.needed_locks[locking.LEVEL_INSTANCE] = \
+        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -4929,6 +5583,25 @@ class LUNodeSetParams(LogicalUnit):
      """
      node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
  
+    if self.lock_instances:
+      affected_instances = \
+        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
+
+      # Verify instance locks
+      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
+      wanted_instances = frozenset(affected_instances.keys())
+      if wanted_instances - owned_instances:
+        raise errors.OpPrereqError("Instances affected by changing node %s's"
+                                   " secondary IP address have changed since"
+                                   " locks were acquired, wanted '%s', have"
+                                   " '%s'; retry the operation" %
+                                   (self.op.node_name,
+                                    utils.CommaJoin(wanted_instances),
+                                    utils.CommaJoin(owned_instances)),
+                                   errors.ECODE_STATE)
+    else:
+      affected_instances = None
+
      if (self.op.master_candidate is not None or
          self.op.drained is not None or
          self.op.offline is not None):
@@ -5018,7 +5691,9 @@ class LUNodeSetParams(LogicalUnit):
  
      if old_role == self._ROLE_OFFLINE and new_role != old_role:
        # Trying to transition out of offline status
-      result = self.rpc.call_version([node.name])[node.name]
+      # TODO: Use standard RPC runner, but make sure it works when the node is
+      # still marked offline
+      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
        if result.fail_msg:
          raise errors.OpPrereqError("Node %s is being de-offlined but fails"
                                     " to report its version: %s" %
@@ -5037,16 +5712,21 @@ class LUNodeSetParams(LogicalUnit):
          raise errors.OpPrereqError("Cannot change the secondary ip on a single"
                                     " homed cluster", errors.ECODE_INVAL)
  
+      assert not (frozenset(affected_instances) -
+                  self.owned_locks(locking.LEVEL_INSTANCE))
+
        if node.offline:
-        if self.affected_instances:
-          raise errors.OpPrereqError("Cannot change secondary ip: offline"
-                                     " node has instances (%s) configured"
-                                     " to use it" % self.affected_instances)
+        if affected_instances:
+          raise errors.OpPrereqError("Cannot change secondary IP address:"
+                                     " offline node has instances (%s)"
+                                     " configured to use it" %
+                                     utils.CommaJoin(affected_instances.keys()))
        else:
          # On online nodes, check that no instances are running, and that
          # the node has the new ip and we can reach it.
-        for instance in self.affected_instances:
-          _CheckInstanceDown(self, instance, "cannot change secondary ip")
+        for instance in affected_instances.values():
+          _CheckInstanceState(self, instance, INSTANCE_DOWN,
+                              msg="cannot change secondary ip")
  
          _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
          if master.name != node.name:
@@ -5063,6 +5743,15 @@ class LUNodeSetParams(LogicalUnit):
        utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
        self.new_ndparams = new_ndparams
  
+    if self.op.hv_state:
+      self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
+                                                 self.node.hv_state_static)
+
+    if self.op.disk_state:
+      self.new_disk_state = \
+        _MergeAndVerifyDiskState(self.op.disk_state,
+                                 self.node.disk_state_static)
+
    def Exec(self, feedback_fn):
      """Modifies a node.
  
@@ -5079,6 +5768,12 @@ class LUNodeSetParams(LogicalUnit):
      if self.op.powered is not None:
        node.powered = self.op.powered
  
+    if self.op.hv_state:
+      node.hv_state_static = self.new_hv_state
+
+    if self.op.disk_state:
+      node.disk_state_static = self.new_disk_state
+
      for attr in ["master_capable", "vm_capable"]:
        val = getattr(self.op, attr)
        if val is not None:
@@ -5186,17 +5881,20 @@ class LUClusterQuery(NoHooksLU):
        "architecture": (platform.architecture()[0], platform.machine()),
        "name": cluster.cluster_name,
        "master": cluster.master_node,
-      "default_hypervisor": cluster.enabled_hypervisors[0],
+      "default_hypervisor": cluster.primary_hypervisor,
        "enabled_hypervisors": cluster.enabled_hypervisors,
        "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
                          for hypervisor_name in cluster.enabled_hypervisors]),
        "os_hvp": os_hvp,
        "beparams": cluster.beparams,
        "osparams": cluster.osparams,
+      "ipolicy": cluster.ipolicy,
        "nicparams": cluster.nicparams,
        "ndparams": cluster.ndparams,
        "candidate_pool_size": cluster.candidate_pool_size,
        "master_netdev": cluster.master_netdev,
+      "master_netmask": cluster.master_netmask,
+      "use_external_mip_script": cluster.use_external_mip_script,
        "volume_group_name": cluster.volume_group_name,
        "drbd_usermode_helper": cluster.drbd_usermode_helper,
        "file_storage_dir": cluster.file_storage_dir,
@@ -5442,7 +6140,7 @@ def _SafeShutdownInstanceDisks(lu, instance, disks=None):
    _ShutdownInstanceDisks.
  
    """
-  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
+  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
    _ShutdownInstanceDisks(lu, instance, disks=disks)
  
  
@@ -5512,10 +6210,12 @@ def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
        we cannot check the node
  
    """
-  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
+  nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
    nodeinfo[node].Raise("Can't get data from node %s" % node,
                         prereq=True, ecode=errors.ECODE_ENVIRON)
-  free_mem = nodeinfo[node].payload.get('memory_free', None)
+  (_, _, (hv_info, )) = nodeinfo[node].payload
+
+  free_mem = hv_info.get("memory_free", None)
    if not isinstance(free_mem, int):
      raise errors.OpPrereqError("Can't compute free memory on node %s, result"
                                 " was '%s'" % (node, free_mem),
@@ -5570,12 +6270,13 @@ def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
        or we cannot check the node
  
    """
-  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
+  nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
    for node in nodenames:
      info = nodeinfo[node]
      info.Raise("Cannot get current information from node %s" % node,
                 prereq=True, ecode=errors.ECODE_ENVIRON)
-    vg_free = info.payload.get("vg_free", None)
+    (_, (vg_info, ), _) = info.payload
+    vg_free = vg_info.get("vg_free", None)
      if not isinstance(vg_free, int):
        raise errors.OpPrereqError("Can't compute free disk space on node"
                                   " %s for vg %s, result was '%s'" %
@@ -5587,6 +6288,41 @@ def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
                                   errors.ECODE_NORES)
  
  
+def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
+  """Checks if nodes have enough physical CPUs
+
+  This function checks if all given nodes have the needed number of
+  physical CPUs. In case any node has less CPUs or we cannot get the
+  information from the node, this function raises an OpPrereqError
+  exception.
+
+  @type lu: C{LogicalUnit}
+  @param lu: a logical unit from which we get configuration data
+  @type nodenames: C{list}
+  @param nodenames: the list of node names to check
+  @type requested: C{int}
+  @param requested: the minimum acceptable number of physical CPUs
+  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
+      or we cannot check the node
+
+  """
+  nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
+  for node in nodenames:
+    info = nodeinfo[node]
+    info.Raise("Cannot get current information from node %s" % node,
+               prereq=True, ecode=errors.ECODE_ENVIRON)
+    (_, _, (hv_info, )) = info.payload
+    num_cpus = hv_info.get("cpu_total", None)
+    if not isinstance(num_cpus, int):
+      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
+                                 " on node %s, result was '%s'" %
+                                 (node, num_cpus), errors.ECODE_ENVIRON)
+    if requested > num_cpus:
+      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
+                                 "required" % (node, num_cpus, requested),
+                                 errors.ECODE_NORES)
+
+
  class LUInstanceStartup(LogicalUnit):
    """Starts an instance.
  
@@ -5599,6 +6335,7 @@ class LUInstanceStartup(LogicalUnit):
      # extra beparams
      if self.op.beparams:
        # fill the beparams dict
+      objects.UpgradeBeParams(self.op.beparams)
        utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
  
    def ExpandNames(self):
@@ -5646,6 +6383,8 @@ class LUInstanceStartup(LogicalUnit):
        hv_type.CheckParameterSyntax(filled_hvp)
        _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
  
+    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
+
      self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
  
      if self.primary_offline and self.op.ignore_offline_nodes:
@@ -5669,7 +6408,7 @@ class LUInstanceStartup(LogicalUnit):
        if not remote_info.payload: # not running already
          _CheckNodeFreeMemory(self, instance.primary_node,
                               "starting instance %s" % instance.name,
-                             bep[constants.BE_MEMORY], instance.hypervisor)
+                             bep[constants.BE_MAXMEM], instance.hypervisor)
  
    def Exec(self, feedback_fn):
      """Start the instance.
@@ -5689,8 +6428,11 @@ class LUInstanceStartup(LogicalUnit):
  
        _StartInstanceDisks(self, instance, force)
  
-      result = self.rpc.call_instance_start(node_current, instance,
-                                            self.op.hvparams, self.op.beparams)
+      result = \
+        self.rpc.call_instance_start(node_current,
+                                     (instance, self.op.hvparams,
+                                      self.op.beparams),
+                                     self.op.startup_paused)
        msg = result.fail_msg
        if msg:
          _ShutdownInstanceDisks(self, instance)
@@ -5740,7 +6482,7 @@ class LUInstanceReboot(LogicalUnit):
      self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
-
+    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
      _CheckNodeOnline(self, instance.primary_node)
  
      # check bridges existence
@@ -5780,7 +6522,8 @@ class LUInstanceReboot(LogicalUnit):
          self.LogInfo("Instance %s was already stopped, starting now",
                       instance.name)
        _StartInstanceDisks(self, instance, ignore_secondaries)
-      result = self.rpc.call_instance_start(node_current, instance, None, None)
+      result = self.rpc.call_instance_start(node_current,
+                                            (instance, None, None), False)
        msg = result.fail_msg
        if msg:
          _ShutdownInstanceDisks(self, instance)
@@ -5828,6 +6571,8 @@ class LUInstanceShutdown(LogicalUnit):
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
  
+    _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
+
      self.primary_offline = \
        self.cfg.GetNodeInfo(self.instance.primary_node).offline
  
@@ -5904,7 +6649,7 @@ class LUInstanceReinstall(LogicalUnit):
        raise errors.OpPrereqError("Instance '%s' has no disks" %
                                   self.op.instance_name,
                                   errors.ECODE_INVAL)
-    _CheckInstanceDown(self, instance, "cannot reinstall")
+    _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
  
      if self.op.os_type is not None:
        # OS verification
@@ -5941,9 +6686,9 @@ class LUInstanceReinstall(LogicalUnit):
      try:
        feedback_fn("Running the instance OS create scripts...")
        # FIXME: pass debug option from opcode to backend
-      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
-                                             self.op.debug_level,
-                                             osparams=self.os_inst)
+      result = self.rpc.call_instance_os_add(inst.primary_node,
+                                             (inst, self.os_inst), True,
+                                             self.op.debug_level)
        result.Raise("Could not install OS for instance %s on node %s" %
                     (inst.name, inst.primary_node))
      finally:
@@ -5977,6 +6722,10 @@ class LUInstanceRecreateDisks(LogicalUnit):
        # otherwise we need to lock all nodes for disk re-creation
        primary_only = bool(self.op.nodes)
        self._LockInstancesNodes(primary_only=primary_only)
+    elif level == locking.LEVEL_NODE_RES:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -6023,10 +6772,12 @@ class LUInstanceRecreateDisks(LogicalUnit):
                                   self.op.instance_name, errors.ECODE_INVAL)
      # if we replace nodes *and* the old primary is offline, we don't
      # check
-    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
+    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
+    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
      old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
      if not (self.op.nodes and old_pnode.offline):
-      _CheckInstanceDown(self, instance, "cannot recreate disks")
+      _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
+                          msg="cannot recreate disks")
  
      if not self.op.disks:
        self.op.disks = range(len(instance.disks))
@@ -6045,31 +6796,47 @@ class LUInstanceRecreateDisks(LogicalUnit):
      """Recreate the disks.
  
      """
-    # change primary node, if needed
-    if self.op.nodes:
-      self.instance.primary_node = self.op.nodes[0]
-      self.LogWarning("Changing the instance's nodes, you will have to"
-                      " remove any disks left on the older nodes manually")
+    instance = self.instance
+
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES))
  
      to_skip = []
-    for idx, disk in enumerate(self.instance.disks):
+    mods = [] # keeps track of needed logical_id changes
+
+    for idx, disk in enumerate(instance.disks):
        if idx not in self.op.disks: # disk idx has not been passed in
          to_skip.append(idx)
          continue
        # update secondaries for disks, if needed
        if self.op.nodes:
          if disk.dev_type == constants.LD_DRBD8:
-          # need to update the nodes
+          # need to update the nodes and minors
            assert len(self.op.nodes) == 2
-          logical_id = list(disk.logical_id)
-          logical_id[0] = self.op.nodes[0]
-          logical_id[1] = self.op.nodes[1]
-          disk.logical_id = tuple(logical_id)
+          assert len(disk.logical_id) == 6 # otherwise disk internals
+                                           # have changed
+          (_, _, old_port, _, _, old_secret) = disk.logical_id
+          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
+          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
+                    new_minors[0], new_minors[1], old_secret)
+          assert len(disk.logical_id) == len(new_id)
+          mods.append((idx, new_id))
+
+    # now that we have passed all asserts above, we can apply the mods
+    # in a single run (to avoid partial changes)
+    for idx, new_id in mods:
+      instance.disks[idx].logical_id = new_id
+
+    # change primary node, if needed
+    if self.op.nodes:
+      instance.primary_node = self.op.nodes[0]
+      self.LogWarning("Changing the instance's nodes, you will have to"
+                      " remove any disks left on the older nodes manually")
  
      if self.op.nodes:
-      self.cfg.Update(self.instance, feedback_fn)
+      self.cfg.Update(instance, feedback_fn)
  
-    _CreateDisks(self, self.instance, to_skip=to_skip)
+    _CreateDisks(self, instance, to_skip=to_skip)
  
  
  class LUInstanceRename(LogicalUnit):
@@ -6116,13 +6883,14 @@ class LUInstanceRename(LogicalUnit):
      instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert instance is not None
      _CheckNodeOnline(self, instance.primary_node)
-    _CheckInstanceDown(self, instance, "cannot rename")
+    _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
+                        msg="cannot rename")
      self.instance = instance
  
      new_name = self.op.new_name
      if self.op.name_check:
        hostname = netutils.GetHostname(name=new_name)
-      if hostname != new_name:
+      if hostname.name != new_name:
          self.LogInfo("Resolved given name '%s' to '%s'", new_name,
                       hostname.name)
        if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
@@ -6150,7 +6918,7 @@ class LUInstanceRename(LogicalUnit):
      old_name = inst.name
  
      rename_file_storage = False
-    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
+    if (inst.disk_template in constants.DTS_FILEBASED and
          self.op.new_name != inst.name):
        old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
        rename_file_storage = True
@@ -6202,11 +6970,16 @@ class LUInstanceRemove(LogicalUnit):
    def ExpandNames(self):
      self._ExpandAndLockInstance()
      self.needed_locks[locking.LEVEL_NODE] = []
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
  
    def DeclareLocks(self, level):
      if level == locking.LEVEL_NODE:
        self._LockInstancesNodes()
+    elif level == locking.LEVEL_NODE_RES:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -6255,6 +7028,12 @@ class LUInstanceRemove(LogicalUnit):
                                   " node %s: %s" %
                                   (instance.name, instance.primary_node, msg))
  
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES))
+    assert not (set(instance.all_nodes) -
+                self.owned_locks(locking.LEVEL_NODE)), \
+      "Not owning correct locks"
+
      _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
  
  
@@ -6284,7 +7063,7 @@ class LUInstanceQuery(NoHooksLU):
    """Logical unit for querying instances.
  
    """
-  # pylint: disable-msg=W0142
+  # pylint: disable=W0142
    REQ_BGL = False
  
    def CheckArguments(self):
@@ -6468,11 +7247,16 @@ class LUInstanceMove(LogicalUnit):
      target_node = _ExpandNodeName(self.cfg, self.op.target_node)
      self.op.target_node = target_node
      self.needed_locks[locking.LEVEL_NODE] = [target_node]
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
  
    def DeclareLocks(self, level):
      if level == locking.LEVEL_NODE:
        self._LockInstancesNodes(primary_only=True)
+    elif level == locking.LEVEL_NODE_RES:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -6530,10 +7314,10 @@ class LUInstanceMove(LogicalUnit):
      _CheckNodeNotDrained(self, target_node)
      _CheckNodeVmCapable(self, target_node)
  
-    if instance.admin_up:
+    if instance.admin_state == constants.ADMINST_UP:
        # check memory requirements on the secondary node
        _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
-                           instance.name, bep[constants.BE_MEMORY],
+                           instance.name, bep[constants.BE_MAXMEM],
                             instance.hypervisor)
      else:
        self.LogInfo("Not checking memory on the secondary node as"
@@ -6557,6 +7341,9 @@ class LUInstanceMove(LogicalUnit):
      self.LogInfo("Shutting down instance %s on source node %s",
                   instance.name, source_node)
  
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES))
+
      result = self.rpc.call_instance_shutdown(source_node, instance,
                                               self.op.shutdown_timeout)
      msg = result.fail_msg
@@ -6621,7 +7408,7 @@ class LUInstanceMove(LogicalUnit):
      _RemoveDisks(self, instance, target_node=source_node)
  
      # Only start the instance if it's marked as up
-    if instance.admin_up:
+    if instance.admin_state == constants.ADMINST_UP:
        self.LogInfo("Starting instance %s on node %s",
                     instance.name, target_node)
  
@@ -6631,7 +7418,8 @@ class LUInstanceMove(LogicalUnit):
          _ShutdownInstanceDisks(self, instance)
          raise errors.OpExecError("Can't activate the instance's disks")
  
-      result = self.rpc.call_instance_start(target_node, instance, None, None)
+      result = self.rpc.call_instance_start(target_node,
+                                            (instance, None, None), False)
        msg = result.fail_msg
        if msg:
          _ShutdownInstanceDisks(self, instance)
@@ -6653,7 +7441,7 @@ class LUNodeMigrate(LogicalUnit):
    def ExpandNames(self):
      self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
  
-    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+    self.share_locks = _ShareAll()
      self.needed_locks = {
        locking.LEVEL_NODE: [self.op.node_name],
        }
@@ -6694,7 +7482,7 @@ class LUNodeMigrate(LogicalUnit):
      # running the iallocator and the actual migration, a good consistency model
      # will have to be found.
  
-    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
+    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
              frozenset([self.op.node_name]))
  
      return ResultWithJobs(jobs)
@@ -6724,6 +7512,11 @@ class TLMigrateInstance(Tasklet):
    @ivar shutdown_timeout: In case of failover timeout of the shutdown
  
    """
+
+  # Constants
+  _MIGRATION_POLL_INTERVAL = 1      # seconds
+  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
+
    def __init__(self, lu, instance_name, cleanup=False,
                 failover=False, fallback=False,
                 ignore_consistency=False,
@@ -6753,10 +7546,11 @@ class TLMigrateInstance(Tasklet):
      assert instance is not None
      self.instance = instance
  
-    if (not self.cleanup and not instance.admin_up and not self.failover and
-        self.fallback):
-      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
-                      " to failover")
+    if (not self.cleanup and
+        not instance.admin_state == constants.ADMINST_UP and
+        not self.failover and self.fallback):
+      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
+                      " switching to failover")
        self.failover = True
  
      if instance.disk_template not in constants.DTS_MIRRORED:
@@ -6815,14 +7609,26 @@ class TLMigrateInstance(Tasklet):
      i_be = self.cfg.GetClusterInfo().FillBE(instance)
  
      # check memory requirements on the secondary node
-    if not self.failover or instance.admin_up:
+    if not self.failover or instance.admin_state == constants.ADMINST_UP:
        _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
-                           instance.name, i_be[constants.BE_MEMORY],
+                           instance.name, i_be[constants.BE_MAXMEM],
                             instance.hypervisor)
      else:
        self.lu.LogInfo("Not checking memory on the secondary node as"
                        " instance will not be started")
  
+    # check if failover must be forced instead of migration
+    if (not self.cleanup and not self.failover and
+        i_be[constants.BE_ALWAYS_FAILOVER]):
+      if self.fallback:
+        self.lu.LogInfo("Instance configured to always failover; fallback"
+                        " to failover")
+        self.failover = True
+      else:
+        raise errors.OpPrereqError("This instance has been configured to"
+                                   " always failover, please allow failover",
+                                   errors.ECODE_STATE)
+
      # check bridge existance
      _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
  
@@ -7047,12 +7853,13 @@ class TLMigrateInstance(Tasklet):
      """
      instance = self.instance
      target_node = self.target_node
+    source_node = self.source_node
      migration_info = self.migration_info
  
-    abort_result = self.rpc.call_finalize_migration(target_node,
-                                                    instance,
-                                                    migration_info,
-                                                    False)
+    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
+                                                                 instance,
+                                                                 migration_info,
+                                                                 False)
      abort_msg = abort_result.fail_msg
      if abort_msg:
        logging.error("Aborting migration failed on target node %s: %s",
@@ -7060,6 +7867,13 @@ class TLMigrateInstance(Tasklet):
        # Don't raise an exception here, as we stil have to try to revert the
        # disk status, even if this step failed.
  
+    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
+        instance, False, self.live)
+    abort_msg = abort_result.fail_msg
+    if abort_msg:
+      logging.error("Aborting migration failed on source node %s: %s",
+                    source_node, abort_msg)
+
    def _ExecMigration(self):
      """Migrate an instance.
  
@@ -7076,6 +7890,24 @@ class TLMigrateInstance(Tasklet):
      target_node = self.target_node
      source_node = self.source_node
  
+    # Check for hypervisor version mismatch and warn the user.
+    nodeinfo = self.rpc.call_node_info([source_node, target_node],
+                                       None, [self.instance.hypervisor])
+    for ninfo in nodeinfo.values():
+      ninfo.Raise("Unable to retrieve node information from node '%s'" %
+                  ninfo.node)
+    (_, _, (src_info, )) = nodeinfo[source_node].payload
+    (_, _, (dst_info, )) = nodeinfo[target_node].payload
+
+    if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
+        (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
+      src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
+      dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
+      if src_version != dst_version:
+        self.feedback_fn("* warning: hypervisor version mismatch between"
+                         " source (%s) and target (%s) node" %
+                         (src_version, dst_version))
+
      self.feedback_fn("* checking disk consistency between source and target")
      for dev in instance.disks:
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
@@ -7131,18 +7963,59 @@ class TLMigrateInstance(Tasklet):
        raise errors.OpExecError("Could not migrate instance %s: %s" %
                                 (instance.name, msg))
  
+    self.feedback_fn("* starting memory transfer")
+    last_feedback = time.time()
+    while True:
+      result = self.rpc.call_instance_get_migration_status(source_node,
+                                                           instance)
+      msg = result.fail_msg
+      ms = result.payload   # MigrationStatus instance
+      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
+        logging.error("Instance migration failed, trying to revert"
+                      " disk status: %s", msg)
+        self.feedback_fn("Migration failed, aborting")
+        self._AbortMigration()
+        self._RevertDiskStatus()
+        raise errors.OpExecError("Could not migrate instance %s: %s" %
+                                 (instance.name, msg))
+
+      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
+        self.feedback_fn("* memory transfer complete")
+        break
+
+      if (utils.TimeoutExpired(last_feedback,
+                               self._MIGRATION_FEEDBACK_INTERVAL) and
+          ms.transferred_ram is not None):
+        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
+        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
+        last_feedback = time.time()
+
+      time.sleep(self._MIGRATION_POLL_INTERVAL)
+
+    result = self.rpc.call_instance_finalize_migration_src(source_node,
+                                                           instance,
+                                                           True,
+                                                           self.live)
+    msg = result.fail_msg
+    if msg:
+      logging.error("Instance migration succeeded, but finalization failed"
+                    " on the source node: %s", msg)
+      raise errors.OpExecError("Could not finalize instance migration: %s" %
+                               msg)
+
      instance.primary_node = target_node
+
      # distribute new instance config to the other nodes
      self.cfg.Update(instance, self.feedback_fn)
  
-    result = self.rpc.call_finalize_migration(target_node,
-                                              instance,
-                                              migration_info,
-                                              True)
+    result = self.rpc.call_instance_finalize_migration_dst(target_node,
+                                                           instance,
+                                                           migration_info,
+                                                           True)
      msg = result.fail_msg
      if msg:
-      logging.error("Instance migration succeeded, but finalization failed:"
-                    " %s", msg)
+      logging.error("Instance migration succeeded, but finalization failed"
+                    " on the target node: %s", msg)
        raise errors.OpExecError("Could not finalize instance migration: %s" %
                                 msg)
  
@@ -7168,12 +8041,16 @@ class TLMigrateInstance(Tasklet):
      source_node = instance.primary_node
      target_node = self.target_node
  
-    if instance.admin_up:
+    if instance.admin_state == constants.ADMINST_UP:
        self.feedback_fn("* checking disk consistency between source and target")
        for dev in instance.disks:
          # for drbd, these are drbd over lvm
-        if not _CheckDiskConsistency(self, dev, target_node, False):
-          if not self.ignore_consistency:
+        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
+          if primary_node.offline:
+            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
+                             " target node %s" %
+                             (primary_node.name, dev.iv_name, target_node))
+          elif not self.ignore_consistency:
              raise errors.OpExecError("Disk %s is degraded on target node,"
                                       " aborting failover" % dev.iv_name)
      else:
@@ -7199,30 +8076,33 @@ class TLMigrateInstance(Tasklet):
                                   (instance.name, source_node, msg))
  
      self.feedback_fn("* deactivating the instance's disks on source node")
-    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
-      raise errors.OpExecError("Can't shut down the instance's disks.")
+    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
+      raise errors.OpExecError("Can't shut down the instance's disks")
  
      instance.primary_node = target_node
      # distribute new instance config to the other nodes
      self.cfg.Update(instance, self.feedback_fn)
  
      # Only start the instance if it's marked as up
-    if instance.admin_up:
-      self.feedback_fn("* activating the instance's disks on target node")
+    if instance.admin_state == constants.ADMINST_UP:
+      self.feedback_fn("* activating the instance's disks on target node %s" %
+                       target_node)
        logging.info("Starting instance %s on node %s",
                     instance.name, target_node)
  
-      disks_ok, _ = _AssembleInstanceDisks(self, instance,
+      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
                                             ignore_secondaries=True)
        if not disks_ok:
-        _ShutdownInstanceDisks(self, instance)
+        _ShutdownInstanceDisks(self.lu, instance)
          raise errors.OpExecError("Can't activate the instance's disks")
  
-      self.feedback_fn("* starting the instance on the target node")
-      result = self.rpc.call_instance_start(target_node, instance, None, None)
+      self.feedback_fn("* starting the instance on the target node %s" %
+                       target_node)
+      result = self.rpc.call_instance_start(target_node, (instance, None, None),
+                                            False)
        msg = result.fail_msg
        if msg:
-        _ShutdownInstanceDisks(self, instance)
+        _ShutdownInstanceDisks(self.lu, instance)
          raise errors.OpExecError("Could not start instance %s on node %s: %s" %
                                   (instance.name, target_node, msg))
  
@@ -7240,10 +8120,8 @@ class TLMigrateInstance(Tasklet):
        # directly, or through an iallocator.
  
      self.all_nodes = [self.source_node, self.target_node]
-    self.nodes_ip = {
-      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
-      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
-      }
+    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
+                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
  
      if self.failover:
        feedback_fn("Failover instance %s" % self.instance.name)
@@ -7342,24 +8220,104 @@ def _GenerateUniqueNames(lu, exts):
    return results
  
  
+def _ComputeLDParams(disk_template, disk_params):
+  """Computes Logical Disk parameters from Disk Template parameters.
+
+  @type disk_template: string
+  @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
+  @type disk_params: dict
+  @param disk_params: disk template parameters; dict(template_name -> parameters
+  @rtype: list(dict)
+  @return: a list of dicts, one for each node of the disk hierarchy. Each dict
+    contains the LD parameters of the node. The tree is flattened in-order.
+
+  """
+  if disk_template not in constants.DISK_TEMPLATES:
+    raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
+
+  result = list()
+  dt_params = disk_params[disk_template]
+  if disk_template == constants.DT_DRBD8:
+    drbd_params = {
+      constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
+      constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
+      constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
+      constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
+      constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
+      constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
+      constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
+      constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
+      constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
+      constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
+      constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
+      constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
+      }
+
+    drbd_params = \
+      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
+                       drbd_params)
+
+    result.append(drbd_params)
+
+    # data LV
+    data_params = {
+      constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
+      }
+    data_params = \
+      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
+                       data_params)
+    result.append(data_params)
+
+    # metadata LV
+    meta_params = {
+      constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
+      }
+    meta_params = \
+      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
+                       meta_params)
+    result.append(meta_params)
+
+  elif (disk_template == constants.DT_FILE or
+        disk_template == constants.DT_SHARED_FILE):
+    result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
+
+  elif disk_template == constants.DT_PLAIN:
+    params = {
+      constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
+      }
+    params = \
+      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
+                       params)
+    result.append(params)
+
+  elif disk_template == constants.DT_BLOCK:
+    result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
+
+  return result
+
+
  def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
-                         iv_name, p_minor, s_minor):
+                         iv_name, p_minor, s_minor, drbd_params, data_params,
+                         meta_params):
    """Generate a drbd8 device complete with its children.
  
    """
    assert len(vgnames) == len(names) == 2
    port = lu.cfg.AllocatePort()
    shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
+
    dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
-                          logical_id=(vgnames[0], names[0]))
-  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
-                          logical_id=(vgnames[1], names[1]))
+                          logical_id=(vgnames[0], names[0]),
+                          params=data_params)
+  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
+                          logical_id=(vgnames[1], names[1]),
+                          params=meta_params)
    drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
                            logical_id=(primary, secondary, port,
                                        p_minor, s_minor,
                                        shared_secret),
                            children=[dev_data, dev_meta],
-                          iv_name=iv_name)
+                          iv_name=iv_name, params=drbd_params)
    return drbd_dev
  
  
@@ -7367,7 +8325,7 @@ def _GenerateDiskTemplate(lu, template_name,
                            instance_name, primary_node,
                            secondary_nodes, disk_info,
                            file_storage_dir, file_driver,
-                          base_index, feedback_fn):
+                          base_index, feedback_fn, disk_params):
    """Generate the entire disk layout for a given template type.
  
    """
@@ -7376,6 +8334,7 @@ def _GenerateDiskTemplate(lu, template_name,
    vgname = lu.cfg.GetVGName()
    disk_count = len(disk_info)
    disks = []
+  ld_params = _ComputeLDParams(template_name, disk_params)
    if template_name == constants.DT_DISKLESS:
      pass
    elif template_name == constants.DT_PLAIN:
@@ -7392,9 +8351,11 @@ def _GenerateDiskTemplate(lu, template_name,
                                size=disk[constants.IDISK_SIZE],
                                logical_id=(vg, names[idx]),
                                iv_name="disk/%d" % disk_index,
-                              mode=disk[constants.IDISK_MODE])
+                              mode=disk[constants.IDISK_MODE],
+                              params=ld_params[0])
        disks.append(disk_dev)
    elif template_name == constants.DT_DRBD8:
+    drbd_params, data_params, meta_params = ld_params
      if len(secondary_nodes) != 1:
        raise errors.ProgrammerError("Wrong template configuration")
      remote_node = secondary_nodes[0]
@@ -7408,14 +8369,16 @@ def _GenerateDiskTemplate(lu, template_name,
        names.append(lv_prefix + "_meta")
      for idx, disk in enumerate(disk_info):
        disk_index = idx + base_index
+      drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
        data_vg = disk.get(constants.IDISK_VG, vgname)
-      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
+      meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
        disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
                                        disk[constants.IDISK_SIZE],
                                        [data_vg, meta_vg],
                                        names[idx * 2:idx * 2 + 2],
                                        "disk/%d" % disk_index,
-                                      minors[idx * 2], minors[idx * 2 + 1])
+                                      minors[idx * 2], minors[idx * 2 + 1],
+                                      drbd_params, data_params, meta_params)
        disk_dev.mode = disk[constants.IDISK_MODE]
        disks.append(disk_dev)
    elif template_name == constants.DT_FILE:
@@ -7432,7 +8395,8 @@ def _GenerateDiskTemplate(lu, template_name,
                                logical_id=(file_driver,
                                            "%s/disk%d" % (file_storage_dir,
                                                           disk_index)),
-                              mode=disk[constants.IDISK_MODE])
+                              mode=disk[constants.IDISK_MODE],
+                              params=ld_params[0])
        disks.append(disk_dev)
    elif template_name == constants.DT_SHARED_FILE:
      if len(secondary_nodes) != 0:
@@ -7448,7 +8412,8 @@ def _GenerateDiskTemplate(lu, template_name,
                                logical_id=(file_driver,
                                            "%s/disk%d" % (file_storage_dir,
                                                           disk_index)),
-                              mode=disk[constants.IDISK_MODE])
+                              mode=disk[constants.IDISK_MODE],
+                              params=ld_params[0])
        disks.append(disk_dev)
    elif template_name == constants.DT_BLOCK:
      if len(secondary_nodes) != 0:
@@ -7461,7 +8426,8 @@ def _GenerateDiskTemplate(lu, template_name,
                                logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
                                            disk[constants.IDISK_ADOPT]),
                                iv_name="disk/%d" % disk_index,
-                              mode=disk[constants.IDISK_MODE])
+                              mode=disk[constants.IDISK_MODE],
+                              params=ld_params[0])
        disks.append(disk_dev)
  
    else:
@@ -7583,7 +8549,7 @@ def _CreateDisks(lu, instance, to_skip=None, target_node=None):
      pnode = target_node
      all_nodes = [pnode]
  
-  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
+  if instance.disk_template in constants.DTS_FILEBASED:
      file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
      result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
  
@@ -7637,6 +8603,11 @@ def _RemoveDisks(lu, instance, target_node=None):
                        " continuing anyway: %s", device.iv_name, node, msg)
          all_result = False
  
+    # if this is a DRBD disk, return its port to the pool
+    if device.dev_type in constants.LDS_DRBD:
+      tcp_port = device.logical_id[2]
+      lu.cfg.AddTcpUdpPort(tcp_port)
+
    if instance.disk_template == constants.DT_FILE:
      file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
      if target_node:
@@ -7672,14 +8643,14 @@ def _ComputeDiskSizePerVG(disk_template, disks):
      constants.DT_DISKLESS: {},
      constants.DT_PLAIN: _compute(disks, 0),
      # 128 MB are added for drbd metadata for each disk
-    constants.DT_DRBD8: _compute(disks, 128),
+    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
      constants.DT_FILE: {},
      constants.DT_SHARED_FILE: {},
    }
  
    if disk_template not in req_size_dict:
      raise errors.ProgrammerError("Disk template '%s' size requirement"
-                                 " is unknown" %  disk_template)
+                                 " is unknown" % disk_template)
  
    return req_size_dict[disk_template]
  
@@ -7693,7 +8664,8 @@ def _ComputeDiskSize(disk_template, disks):
      constants.DT_DISKLESS: None,
      constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
      # 128 MB are added for drbd metadata for each disk
-    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
+    constants.DT_DRBD8:
+      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
      constants.DT_FILE: None,
      constants.DT_SHARED_FILE: 0,
      constants.DT_BLOCK: 0,
@@ -7701,7 +8673,7 @@ def _ComputeDiskSize(disk_template, disks):
  
    if disk_template not in req_size_dict:
      raise errors.ProgrammerError("Disk template '%s' size requirement"
-                                 " is unknown" %  disk_template)
+                                 " is unknown" % disk_template)
  
    return req_size_dict[disk_template]
  
@@ -7739,9 +8711,11 @@ def _CheckHVParams(lu, nodenames, hvname, hvparams):
  
    """
    nodenames = _FilterVmNodes(lu, nodenames)
-  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
-                                                  hvname,
-                                                  hvparams)
+
+  cluster = lu.cfg.GetClusterInfo()
+  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
+
+  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
    for node in nodenames:
      info = hvinfo[node]
      if info.offline:
@@ -7767,7 +8741,7 @@ def _CheckOSParams(lu, required, nodenames, osname, osparams):
  
    """
    nodenames = _FilterVmNodes(lu, nodenames)
-  result = lu.rpc.call_os_validate(required, nodenames, osname,
+  result = lu.rpc.call_os_validate(nodenames, required, osname,
                                     [constants.OS_VALIDATE_PARAMETERS],
                                     osparams)
    for node, nres in result.items():
@@ -7856,9 +8830,10 @@ class LUInstanceCreate(LogicalUnit):
        raise errors.OpPrereqError("Invalid file driver name '%s'" %
                                   self.op.file_driver, errors.ECODE_INVAL)
  
-    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
-      raise errors.OpPrereqError("File storage directory path not absolute",
-                                 errors.ECODE_INVAL)
+    if self.op.disk_template == constants.DT_FILE:
+      opcodes.RequireFileStorage()
+    elif self.op.disk_template == constants.DT_SHARED_FILE:
+      opcodes.RequireSharedFileStorage()
  
      ### Node/iallocator related checks
      _CheckIAllocatorOrNode(self, "iallocator", "pnode")
@@ -7959,7 +8934,11 @@ class LUInstanceCreate(LogicalUnit):
      self.add_locks[locking.LEVEL_INSTANCE] = instance_name
  
      if self.op.iallocator:
+      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
+      # specifying a group on instance creation and then selecting nodes from
+      # that group
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
      else:
        self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
        nodelist = [self.op.pnode]
@@ -7967,6 +8946,9 @@ class LUInstanceCreate(LogicalUnit):
          self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
          nodelist.append(self.op.snode)
        self.needed_locks[locking.LEVEL_NODE] = nodelist
+      # Lock resources of instance's primary and secondary nodes (copy to
+      # prevent accidential modification)
+      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
  
      # in case of import lock the source node too
      if self.op.mode == constants.INSTANCE_IMPORT:
@@ -7980,8 +8962,8 @@ class LUInstanceCreate(LogicalUnit):
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
          self.op.src_node = None
          if os.path.isabs(src_path):
-          raise errors.OpPrereqError("Importing an instance from an absolute"
-                                     " path requires a source node option",
+          raise errors.OpPrereqError("Importing an instance from a path"
+                                     " requires a source node option",
                                       errors.ECODE_INVAL)
        else:
          self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
@@ -8003,7 +8985,7 @@ class LUInstanceCreate(LogicalUnit):
                       tags=self.op.tags,
                       os=self.op.os_type,
                       vcpus=self.be_full[constants.BE_VCPUS],
-                     memory=self.be_full[constants.BE_MEMORY],
+                     memory=self.be_full[constants.BE_MAXMEM],
                       disks=self.disks,
                       nics=nics,
                       hypervisor=self.op.hypervisor,
@@ -8048,7 +9030,8 @@ class LUInstanceCreate(LogicalUnit):
        secondary_nodes=self.secondaries,
        status=self.op.start,
        os_type=self.op.os_type,
-      memory=self.be_full[constants.BE_MEMORY],
+      minmem=self.be_full[constants.BE_MINMEM],
+      maxmem=self.be_full[constants.BE_MAXMEM],
        vcpus=self.be_full[constants.BE_VCPUS],
        nics=_NICListToTuple(self, self.nics),
        disk_template=self.op.disk_template,
@@ -8057,6 +9040,7 @@ class LUInstanceCreate(LogicalUnit):
        bep=self.be_full,
        hvp=self.hv_full,
        hypervisor_name=self.op.hypervisor,
+      tags=self.op.tags,
      ))
  
      return env
@@ -8083,7 +9067,7 @@ class LUInstanceCreate(LogicalUnit):
      src_path = self.op.src_path
  
      if src_node is None:
-      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
+      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
        exp_list = self.rpc.call_export_list(locked_nodes)
        found = False
        for node in exp_list:
@@ -8129,38 +9113,48 @@ class LUInstanceCreate(LogicalUnit):
        if einfo.has_option(constants.INISECT_INS, "disk_template"):
          self.op.disk_template = einfo.get(constants.INISECT_INS,
                                            "disk_template")
+        if self.op.disk_template not in constants.DISK_TEMPLATES:
+          raise errors.OpPrereqError("Disk template specified in configuration"
+                                     " file is not one of the allowed values:"
+                                     " %s" % " ".join(constants.DISK_TEMPLATES))
        else:
          raise errors.OpPrereqError("No disk template specified and the export"
                                     " is missing the disk_template information",
                                     errors.ECODE_INVAL)
  
      if not self.op.disks:
-      if einfo.has_option(constants.INISECT_INS, "disk_count"):
-        disks = []
-        # TODO: import the disk iv_name too
-        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
+      disks = []
+      # TODO: import the disk iv_name too
+      for idx in range(constants.MAX_DISKS):
+        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
            disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
            disks.append({constants.IDISK_SIZE: disk_sz})
-        self.op.disks = disks
-      else:
+      self.op.disks = disks
+      if not disks and self.op.disk_template != constants.DT_DISKLESS:
          raise errors.OpPrereqError("No disk info specified and the export"
                                     " is missing the disk information",
                                     errors.ECODE_INVAL)
  
-    if (not self.op.nics and
-        einfo.has_option(constants.INISECT_INS, "nic_count")):
+    if not self.op.nics:
        nics = []
-      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
-        ndict = {}
-        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
-          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
-          ndict[name] = v
-        nics.append(ndict)
+      for idx in range(constants.MAX_NICS):
+        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
+          ndict = {}
+          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
+            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
+            ndict[name] = v
+          nics.append(ndict)
+        else:
+          break
        self.op.nics = nics
  
+    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
+      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
+
      if (self.op.hypervisor is None and
          einfo.has_option(constants.INISECT_INS, "hypervisor")):
        self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
+
      if einfo.has_section(constants.INISECT_HYP):
        # use the export parameters but do not override the ones
        # specified by the user
@@ -8173,6 +9167,12 @@ class LUInstanceCreate(LogicalUnit):
        for name, value in einfo.items(constants.INISECT_BEP):
          if name not in self.op.beparams:
            self.op.beparams[name] = value
+        # Compatibility for the old "memory" be param
+        if name == constants.BE_MEMORY:
+          if constants.BE_MAXMEM not in self.op.beparams:
+            self.op.beparams[constants.BE_MAXMEM] = value
+          if constants.BE_MINMEM not in self.op.beparams:
+            self.op.beparams[constants.BE_MINMEM] = value
      else:
        # try to read the parameters old style, from the main section
        for name in constants.BES_PARAMETERS:
@@ -8212,10 +9212,40 @@ class LUInstanceCreate(LogicalUnit):
        if name in os_defs and os_defs[name] == self.op.osparams[name]:
          del self.op.osparams[name]
  
+  def _CalculateFileStorageDir(self):
+    """Calculate final instance file storage dir.
+
+    """
+    # file storage dir calculation/check
+    self.instance_file_storage_dir = None
+    if self.op.disk_template in constants.DTS_FILEBASED:
+      # build the full file storage dir path
+      joinargs = []
+
+      if self.op.disk_template == constants.DT_SHARED_FILE:
+        get_fsd_fn = self.cfg.GetSharedFileStorageDir
+      else:
+        get_fsd_fn = self.cfg.GetFileStorageDir
+
+      cfg_storagedir = get_fsd_fn()
+      if not cfg_storagedir:
+        raise errors.OpPrereqError("Cluster file storage dir not defined")
+      joinargs.append(cfg_storagedir)
+
+      if self.op.file_storage_dir is not None:
+        joinargs.append(self.op.file_storage_dir)
+
+      joinargs.append(self.op.instance_name)
+
+      # pylint: disable=W0142
+      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
+
    def CheckPrereq(self):
      """Check prerequisites.
  
      """
+    self._CalculateFileStorageDir()
+
      if self.op.mode == constants.INSTANCE_IMPORT:
        export_info = self._ReadExportInfo()
        self._ReadExportParams(export_info)
@@ -8225,7 +9255,8 @@ class LUInstanceCreate(LogicalUnit):
        raise errors.OpPrereqError("Cluster does not support lvm-based"
                                   " instances", errors.ECODE_STATE)
  
-    if self.op.hypervisor is None:
+    if (self.op.hypervisor is None or
+        self.op.hypervisor == constants.VALUE_AUTO):
        self.op.hypervisor = self.cfg.GetHypervisorType()
  
      cluster = self.cfg.GetClusterInfo()
@@ -8251,6 +9282,11 @@ class LUInstanceCreate(LogicalUnit):
      _CheckGlobalHvParams(self.op.hvparams)
  
      # fill and remember the beparams dict
+    default_beparams = cluster.beparams[constants.PP_DEFAULT]
+    for param, value in self.op.beparams.iteritems():
+      if value == constants.VALUE_AUTO:
+        self.op.beparams[param] = default_beparams[param]
+    objects.UpgradeBeParams(self.op.beparams)
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
      self.be_full = cluster.SimpleFillBE(self.op.beparams)
  
@@ -8267,7 +9303,7 @@ class LUInstanceCreate(LogicalUnit):
      for idx, nic in enumerate(self.op.nics):
        nic_mode_req = nic.get(constants.INIC_MODE, None)
        nic_mode = nic_mode_req
-      if nic_mode is None:
+      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
          nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
  
        # in routed mode, for the first nic, the default ip is 'auto'
@@ -8311,9 +9347,11 @@ class LUInstanceCreate(LogicalUnit):
  
        #  Build nic parameters
        link = nic.get(constants.INIC_LINK, None)
+      if link == constants.VALUE_AUTO:
+        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
        nicparams = {}
        if nic_mode_req:
-        nicparams[constants.NIC_MODE] = nic_mode_req
+        nicparams[constants.NIC_MODE] = nic_mode
        if link:
          nicparams[constants.NIC_LINK] = link
  
@@ -8343,26 +9381,17 @@ class LUInstanceCreate(LogicalUnit):
          constants.IDISK_SIZE: size,
          constants.IDISK_MODE: mode,
          constants.IDISK_VG: data_vg,
-        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
          }
+      if constants.IDISK_METAVG in disk:
+        new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
        if constants.IDISK_ADOPT in disk:
          new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
        self.disks.append(new_disk)
  
      if self.op.mode == constants.INSTANCE_IMPORT:
-
-      # Check that the new instance doesn't have less disks than the export
-      instance_disks = len(self.disks)
-      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
-      if instance_disks < export_disks:
-        raise errors.OpPrereqError("Not enough disks to import."
-                                   " (instance: %d, export: %d)" %
-                                   (instance_disks, export_disks),
-                                   errors.ECODE_INVAL)
-
        disk_images = []
-      for idx in range(export_disks):
-        option = 'disk%d_dump' % idx
+      for idx in range(len(self.disks)):
+        option = "disk%d_dump" % idx
          if export_info.has_option(constants.INISECT_INS, option):
            # FIXME: are the old os-es, disk sizes, etc. useful?
            export_name = export_info.get(constants.INISECT_INS, option)
@@ -8373,17 +9402,11 @@ class LUInstanceCreate(LogicalUnit):
  
        self.src_images = disk_images
  
-      old_name = export_info.get(constants.INISECT_INS, 'name')
-      try:
-        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
-      except (TypeError, ValueError), err:
-        raise errors.OpPrereqError("Invalid export file, nic_count is not"
-                                   " an integer: %s" % str(err),
-                                   errors.ECODE_STATE)
+      old_name = export_info.get(constants.INISECT_INS, "name")
        if self.op.instance_name == old_name:
          for idx, nic in enumerate(self.nics):
-          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
-            nic_mac_ini = 'nic%d_mac' % idx
+          if nic.mac == constants.VALUE_AUTO:
+            nic_mac_ini = "nic%d_mac" % idx
              nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
  
      # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
@@ -8412,6 +9435,11 @@ class LUInstanceCreate(LogicalUnit):
      if self.op.iallocator is not None:
        self._RunAllocator()
  
+    # Release all unneeded node locks
+    _ReleaseLocks(self, locking.LEVEL_NODE,
+                  keep=filter(None, [self.op.pnode, self.op.snode,
+                                     self.op.src_node]))
+
      #### node related checks
  
      # check primary node
@@ -8440,8 +9468,19 @@ class LUInstanceCreate(LogicalUnit):
        _CheckNodeVmCapable(self, self.op.snode)
        self.secondaries.append(self.op.snode)
  
+      snode = self.cfg.GetNodeInfo(self.op.snode)
+      if pnode.group != snode.group:
+        self.LogWarning("The primary and secondary nodes are in two"
+                        " different node groups; the disk parameters"
+                        " from the first disk's node group will be"
+                        " used")
+
      nodenames = [pnode.name] + self.secondaries
  
+    # disk parameters (not customizable at instance or node level)
+    # just use the primary node parameters, ignoring the secondary.
+    self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
+
      if not self.adopt_disks:
        # Check lv size requirements, if not adopting
        req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
@@ -8526,10 +9565,11 @@ class LUInstanceCreate(LogicalUnit):
      _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
  
      # memory check on primary node
+    #TODO(dynmem): use MINMEM for checking
      if self.op.start:
        _CheckNodeFreeMemory(self, self.pnode.name,
                             "creating instance %s" % self.op.instance_name,
-                           self.be_full[constants.BE_MEMORY],
+                           self.be_full[constants.BE_MAXMEM],
                             self.op.hypervisor)
  
      self.dry_run_result = list(nodenames)
@@ -8541,60 +9581,50 @@ class LUInstanceCreate(LogicalUnit):
      instance = self.op.instance_name
      pnode_name = self.pnode.name
  
+    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
+                self.owned_locks(locking.LEVEL_NODE)), \
+      "Node locks differ from node resource locks"
+
      ht_kind = self.op.hypervisor
      if ht_kind in constants.HTS_REQ_PORT:
        network_port = self.cfg.AllocatePort()
      else:
        network_port = None
  
-    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
-      # this is needed because os.path.join does not accept None arguments
-      if self.op.file_storage_dir is None:
-        string_file_storage_dir = ""
-      else:
-        string_file_storage_dir = self.op.file_storage_dir
-
-      # build the full file storage dir path
-      if self.op.disk_template == constants.DT_SHARED_FILE:
-        get_fsd_fn = self.cfg.GetSharedFileStorageDir
-      else:
-        get_fsd_fn = self.cfg.GetFileStorageDir
-
-      file_storage_dir = utils.PathJoin(get_fsd_fn(),
-                                        string_file_storage_dir, instance)
-    else:
-      file_storage_dir = ""
-
      disks = _GenerateDiskTemplate(self,
                                    self.op.disk_template,
                                    instance, pnode_name,
                                    self.secondaries,
                                    self.disks,
-                                  file_storage_dir,
+                                  self.instance_file_storage_dir,
                                    self.op.file_driver,
                                    0,
-                                  feedback_fn)
+                                  feedback_fn,
+                                  self.diskparams)
  
      iobj = objects.Instance(name=instance, os=self.op.os_type,
                              primary_node=pnode_name,
                              nics=self.nics, disks=disks,
                              disk_template=self.op.disk_template,
-                            admin_up=False,
+                            admin_state=constants.ADMINST_DOWN,
                              network_port=network_port,
                              beparams=self.op.beparams,
                              hvparams=self.op.hvparams,
                              hypervisor=self.op.hypervisor,
                              osparams=self.op.osparams,
-                            tags=self.op.tags,
                              )
  
+    if self.op.tags:
+      for tag in self.op.tags:
+        iobj.AddTag(tag)
+
      if self.adopt_disks:
        if self.op.disk_template == constants.DT_PLAIN:
          # rename LVs to the newly-generated names; we need to construct
          # 'fake' LV disks with the old data, plus the new unique_id
          tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
          rename_to = []
-        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
+        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
            rename_to.append(t_dsk.logical_id)
            t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
            self.cfg.SetDiskID(t_dsk, pnode_name)
@@ -8645,7 +9675,6 @@ class LUInstanceCreate(LogicalUnit):
        disk_abort = not _WaitForSync(self, iobj)
      elif iobj.disk_template in constants.DTS_INT_MIRROR:
        # make sure the disks are not degraded (still sync-ing is ok)
-      time.sleep(15)
        feedback_fn("* checking mirrors status")
        disk_abort = not _WaitForSync(self, iobj, oneshot=True)
      else:
@@ -8659,15 +9688,39 @@ class LUInstanceCreate(LogicalUnit):
        raise errors.OpExecError("There are some degraded disks for"
                                 " this instance")
  
+    # Release all node resource locks
+    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
+
      if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
        if self.op.mode == constants.INSTANCE_CREATE:
          if not self.op.no_install:
+          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
+                        not self.op.wait_for_sync)
+          if pause_sync:
+            feedback_fn("* pausing disk sync to install instance OS")
+            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
+                                                              iobj.disks, True)
+            for idx, success in enumerate(result.payload):
+              if not success:
+                logging.warn("pause-sync of instance %s for disk %d failed",
+                             instance, idx)
+
            feedback_fn("* running the instance OS create scripts...")
            # FIXME: pass debug option from opcode to backend
-          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
-                                                 self.op.debug_level)
-          result.Raise("Could not add os for instance %s"
-                       " on node %s" % (instance, pnode_name))
+          os_add_result = \
+            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
+                                          self.op.debug_level)
+          if pause_sync:
+            feedback_fn("* resuming disk sync")
+            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
+                                                              iobj.disks, False)
+            for idx, success in enumerate(result.payload):
+              if not success:
+                logging.warn("resume-sync of instance %s for disk %d failed",
+                             instance, idx)
+
+          os_add_result.Raise("Could not add os for instance %s"
+                              " on node %s" % (instance, pnode_name))
  
        elif self.op.mode == constants.INSTANCE_IMPORT:
          feedback_fn("* running the instance OS import scripts...")
@@ -8730,12 +9783,15 @@ class LUInstanceCreate(LogicalUnit):
          raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
                                       % self.op.mode)
  
+    assert not self.owned_locks(locking.LEVEL_NODE_RES)
+
      if self.op.start:
-      iobj.admin_up = True
+      iobj.admin_state = constants.ADMINST_UP
        self.cfg.Update(iobj, feedback_fn)
        logging.info("Starting instance %s on node %s", instance, pnode_name)
        feedback_fn("* starting instance...")
-      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
+      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
+                                            False)
        result.Raise("Could not start instance")
  
      return list(iobj.all_nodes)
@@ -8752,6 +9808,7 @@ class LUInstanceConsole(NoHooksLU):
    REQ_BGL = False
  
    def ExpandNames(self):
+    self.share_locks = _ShareAll()
      self._ExpandAndLockInstance()
  
    def CheckPrereq(self):
@@ -8777,10 +9834,12 @@ class LUInstanceConsole(NoHooksLU):
      node_insts.Raise("Can't get node information from %s" % node)
  
      if instance.name not in node_insts.payload:
-      if instance.admin_up:
+      if instance.admin_state == constants.ADMINST_UP:
          state = constants.INSTST_ERRORDOWN
-      else:
+      elif instance.admin_state == constants.ADMINST_DOWN:
          state = constants.INSTST_ADMINDOWN
+      else:
+        state = constants.INSTST_ADMINOFFLINE
        raise errors.OpExecError("Instance %s is not running (state %s)" %
                                 (instance.name, state))
  
@@ -8826,6 +9885,7 @@ class LUInstanceReplaceDisks(LogicalUnit):
      self._ExpandAndLockInstance()
  
      assert locking.LEVEL_NODE not in self.needed_locks
+    assert locking.LEVEL_NODE_RES not in self.needed_locks
      assert locking.LEVEL_NODEGROUP not in self.needed_locks
  
      assert self.op.iallocator is None or self.op.remote_node is None, \
@@ -8848,6 +9908,8 @@ class LUInstanceReplaceDisks(LogicalUnit):
          # iallocator will select a new node in the same group
          self.needed_locks[locking.LEVEL_NODEGROUP] = []
  
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
+
      self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
                                     self.op.iallocator, self.op.remote_node,
                                     self.op.disks, False, self.op.early_release)
@@ -8861,6 +9923,8 @@ class LUInstanceReplaceDisks(LogicalUnit):
        assert not self.needed_locks[locking.LEVEL_NODEGROUP]
  
        self.share_locks[locking.LEVEL_NODEGROUP] = 1
+      # Lock all groups used by instance optimistically; this requires going
+      # via the node before it's locked, requiring verification later on
        self.needed_locks[locking.LEVEL_NODEGROUP] = \
          self.cfg.GetInstanceNodeGroups(self.op.instance_name)
  
@@ -8871,10 +9935,14 @@ class LUInstanceReplaceDisks(LogicalUnit):
  
          # Lock member nodes of all locked groups
          self.needed_locks[locking.LEVEL_NODE] = [node_name
-          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
+          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
            for node_name in self.cfg.GetNodeGroup(group_uuid).members]
        else:
          self._LockInstancesNodes()
+    elif level == locking.LEVEL_NODE_RES:
+      # Reuse node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -8911,16 +9979,10 @@ class LUInstanceReplaceDisks(LogicalUnit):
      assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
              self.op.iallocator is None)
  
-    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
+    # Verify if node group locks are still correct
+    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
      if owned_groups:
-      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
-      if owned_groups != groups:
-        raise errors.OpExecError("Node groups used by instance '%s' changed"
-                                 " since lock was acquired, current list is %r,"
-                                 " used to be '%s'" %
-                                 (self.op.instance_name,
-                                  utils.CommaJoin(groups),
-                                  utils.CommaJoin(owned_groups)))
+      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
  
      return LogicalUnit.CheckPrereq(self)
  
@@ -8985,7 +10047,7 @@ class TLReplaceDisks(Tasklet):
      ial = IAllocator(lu.cfg, lu.rpc,
                       mode=constants.IALLOCATOR_MODE_RELOC,
                       name=instance_name,
-                     relocate_from=relocate_from)
+                     relocate_from=list(relocate_from))
  
      ial.Run(iallocator_name)
  
@@ -9009,6 +10071,9 @@ class TLReplaceDisks(Tasklet):
      return remote_node_name
  
    def _FindFaultyDisks(self, node_name):
+    """Wrapper for L{_FindFaultyInstanceDisks}.
+
+    """
      return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
                                      node_name, True)
  
@@ -9079,7 +10144,7 @@ class TLReplaceDisks(Tasklet):
      if remote_node is None:
        self.remote_node_info = None
      else:
-      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
+      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
               "Remote node '%s' is not locked" % remote_node
  
        self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
@@ -9165,6 +10230,16 @@ class TLReplaceDisks(Tasklet):
        if not self.disks:
          self.disks = range(len(self.instance.disks))
  
+    # TODO: compute disk parameters
+    primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
+    secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
+    if primary_node_info.group != secondary_node_info.group:
+      self.lu.LogInfo("The instance primary and secondary nodes are in two"
+                      " different node groups; the disk parameters of the"
+                      " primary node's group will be applied.")
+
+    self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
+
      for node in check_nodes:
        _CheckNodeOnline(self.lu, node)
  
@@ -9173,8 +10248,9 @@ class TLReplaceDisks(Tasklet):
                                                            self.target_node]
                                if node_name is not None)
  
-    # Release unneeded node locks
+    # Release unneeded node and node resource locks
      _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
+    _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
  
      # Release any owned node group
      if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
@@ -9185,9 +10261,8 @@ class TLReplaceDisks(Tasklet):
        instance.FindDisk(disk_idx)
  
      # Get secondary node IP addresses
-    self.node_secondary_ip = \
-      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
-           for node_name in touched_nodes)
+    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
+                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
  
    def Exec(self, feedback_fn):
      """Execute disk replacement.
@@ -9200,13 +10275,15 @@ class TLReplaceDisks(Tasklet):
  
      if __debug__:
        # Verify owned locks before starting operation
-      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
-      assert set(owned_locks) == set(self.node_secondary_ip), \
+      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
+      assert set(owned_nodes) == set(self.node_secondary_ip), \
            ("Incorrect node locks, owning %s, expected %s" %
-           (owned_locks, self.node_secondary_ip.keys()))
+           (owned_nodes, self.node_secondary_ip.keys()))
+      assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
+              self.lu.owned_locks(locking.LEVEL_NODE_RES))
  
-      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
-      assert list(owned_locks) == [self.instance_name], \
+      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
+      assert list(owned_instances) == [self.instance_name], \
            "Instance '%s' not locked" % self.instance_name
  
        assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
@@ -9219,7 +10296,7 @@ class TLReplaceDisks(Tasklet):
      feedback_fn("Replacing disk(s) %s for %s" %
                  (utils.CommaJoin(self.disks), self.instance.name))
  
-    activate_disks = (not self.instance.admin_up)
+    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
  
      # Activate the instance disks if we're replacing them on a down instance
      if activate_disks:
@@ -9239,14 +10316,16 @@ class TLReplaceDisks(Tasklet):
        if activate_disks:
          _SafeShutdownInstanceDisks(self.lu, self.instance)
  
+    assert not self.lu.owned_locks(locking.LEVEL_NODE)
+
      if __debug__:
        # Verify owned locks
-      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
+      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
        nodes = frozenset(self.node_secondary_ip)
-      assert ((self.early_release and not owned_locks) or
-              (not self.early_release and not (set(owned_locks) - nodes))), \
+      assert ((self.early_release and not owned_nodes) or
+              (not self.early_release and not (set(owned_nodes) - nodes))), \
          ("Not owning the correct locks, early_release=%s, owned=%r,"
-         " nodes=%r" % (self.early_release, owned_locks, nodes))
+         " nodes=%r" % (self.early_release, owned_nodes, nodes))
  
      return result
  
@@ -9301,6 +10380,12 @@ class TLReplaceDisks(Tasklet):
                                   (node_name, self.instance.name))
  
    def _CreateNewStorage(self, node_name):
+    """Create new storage on the primary or secondary node.
+
+    This is only used for same-node replaces, not for changing the
+    secondary node, hence we don't want to modify the existing disk.
+
+    """
      iv_names = {}
  
      for idx, dev in enumerate(self.instance.disks):
@@ -9314,15 +10399,17 @@ class TLReplaceDisks(Tasklet):
        lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
        names = _GenerateUniqueNames(self.lu, lv_names)
  
+      _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
+
        vg_data = dev.children[0].logical_id[0]
        lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
-                             logical_id=(vg_data, names[0]))
+                             logical_id=(vg_data, names[0]), params=data_p)
        vg_meta = dev.children[1].logical_id[0]
-      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
-                             logical_id=(vg_meta, names[1]))
+      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
+                             logical_id=(vg_meta, names[1]), params=meta_p)
  
        new_lvs = [lv_data, lv_meta]
-      old_lvs = dev.children
+      old_lvs = [child.Copy() for child in dev.children]
        iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
  
        # we pass force_create=True to force the LVM creation
@@ -9360,7 +10447,7 @@ class TLReplaceDisks(Tasklet):
            self.lu.LogWarning("Can't remove old LV: %s" % msg,
                               hint="remove unused LVs manually")
  
-  def _ExecDrbd8DiskOnly(self, feedback_fn):
+  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
      """Replace a disk on the primary or secondary for DRBD 8.
  
      The algorithm for replace is quite complicated:
@@ -9443,10 +10530,14 @@ class TLReplaceDisks(Tasklet):
                                               rename_new_to_old)
        result.Raise("Can't rename new LVs on node %s" % self.target_node)
  
+      # Intermediate steps of in memory modifications
        for old, new in zip(old_lvs, new_lvs):
          new.logical_id = old.logical_id
          self.cfg.SetDiskID(new, self.target_node)
  
+      # We need to modify old_lvs so that removal later removes the
+      # right LVs, not the newly added ones; note that old_lvs is a
+      # copy here
        for disk in old_lvs:
          disk.logical_id = ren_fn(disk, temp_suffix)
          self.cfg.SetDiskID(disk, self.target_node)
@@ -9466,25 +10557,28 @@ class TLReplaceDisks(Tasklet):
                                       "volumes"))
          raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
  
-      dev.children = new_lvs
+    cstep = itertools.count(5)
  
-      self.cfg.Update(self.instance, feedback_fn)
-
-    cstep = 5
      if self.early_release:
-      self.lu.LogStep(cstep, steps_total, "Removing old storage")
-      cstep += 1
+      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
        self._RemoveOldStorage(self.target_node, iv_names)
-      # WARNING: we release both node locks here, do not do other RPCs
-      # than WaitForSync to the primary node
-      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
-                    names=[self.target_node, self.other_node])
+      # TODO: Check if releasing locks early still makes sense
+      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
+    else:
+      # Release all resource locks except those used by the instance
+      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
+                    keep=self.node_secondary_ip.keys())
+
+    # Release all node locks while waiting for sync
+    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
+
+    # TODO: Can the instance lock be downgraded here? Take the optional disk
+    # shutdown in the caller into consideration.
  
      # Wait for sync
      # This can fail as the old devices are degraded and _WaitForSync
      # does a combined result over all disks, so we don't check its return value
-    self.lu.LogStep(cstep, steps_total, "Sync devices")
-    cstep += 1
+    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
      _WaitForSync(self.lu, self.instance)
  
      # Check all devices manually
@@ -9492,8 +10586,7 @@ class TLReplaceDisks(Tasklet):
  
      # Step: remove old storage
      if not self.early_release:
-      self.lu.LogStep(cstep, steps_total, "Removing old storage")
-      cstep += 1
+      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
        self._RemoveOldStorage(self.target_node, iv_names)
  
    def _ExecDrbd8Secondary(self, feedback_fn):
@@ -9517,6 +10610,8 @@ class TLReplaceDisks(Tasklet):
      """
      steps_total = 6
  
+    pnode = self.instance.primary_node
+
      # Step: check device activation
      self.lu.LogStep(1, steps_total, "Check device existence")
      self._CheckDisksExistence([self.instance.primary_node])
@@ -9568,10 +10663,12 @@ class TLReplaceDisks(Tasklet):
        iv_names[idx] = (dev, dev.children, new_net_id)
        logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
                      new_net_id)
+      drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
        new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
                                logical_id=new_alone_id,
                                children=dev.children,
-                              size=dev.size)
+                              size=dev.size,
+                              params=drbd_params)
        try:
          _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
                                _GetInstanceInfoText(self.instance), False)
@@ -9591,10 +10688,8 @@ class TLReplaceDisks(Tasklet):
                                   " soon as possible"))
  
      self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
-    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
-                                               self.node_secondary_ip,
-                                               self.instance.disks)\
-                                              [self.instance.primary_node]
+    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
+                                               self.instance.disks)[pnode]
  
      msg = result.fail_msg
      if msg:
@@ -9612,6 +10707,9 @@ class TLReplaceDisks(Tasklet):
  
      self.cfg.Update(self.instance, feedback_fn)
  
+    # Release all node locks (the configuration has been updated)
+    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
+
      # and now perform the drbd attach
      self.lu.LogInfo("Attaching primary drbds to new secondary"
                      " (standalone => connected)")
@@ -9628,23 +10726,26 @@ class TLReplaceDisks(Tasklet):
                             to_node, msg,
                             hint=("please do a gnt-instance info to see the"
                                   " status of disks"))
-    cstep = 5
+
+    cstep = itertools.count(5)
+
      if self.early_release:
-      self.lu.LogStep(cstep, steps_total, "Removing old storage")
-      cstep += 1
+      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
        self._RemoveOldStorage(self.target_node, iv_names)
-      # WARNING: we release all node locks here, do not do other RPCs
-      # than WaitForSync to the primary node
-      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
-                    names=[self.instance.primary_node,
-                           self.target_node,
-                           self.new_node])
+      # TODO: Check if releasing locks early still makes sense
+      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
+    else:
+      # Release all resource locks except those used by the instance
+      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
+                    keep=self.node_secondary_ip.keys())
+
+    # TODO: Can the instance lock be downgraded here? Take the optional disk
+    # shutdown in the caller into consideration.
  
      # Wait for sync
      # This can fail as the old devices are degraded and _WaitForSync
      # does a combined result over all disks, so we don't check its return value
-    self.lu.LogStep(cstep, steps_total, "Sync devices")
-    cstep += 1
+    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
      _WaitForSync(self.lu, self.instance)
  
      # Check all devices manually
@@ -9652,7 +10753,7 @@ class TLReplaceDisks(Tasklet):
  
      # Step: remove old storage
      if not self.early_release:
-      self.lu.LogStep(cstep, steps_total, "Removing old storage")
+      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
        self._RemoveOldStorage(self.target_node, iv_names)
  
  
@@ -9698,7 +10799,7 @@ class LURepairNodeStorage(NoHooksLU):
      """
      # Check whether any instance on this node has faulty disks
      for inst in _GetNodeInstances(self.cfg, self.op.node_name):
-      if not inst.admin_up:
+      if inst.admin_state != constants.ADMINST_UP:
          continue
        check_nodes = set(inst.all_nodes)
        check_nodes.discard(self.op.node_name)
@@ -9718,51 +10819,260 @@ class LURepairNodeStorage(NoHooksLU):
                   (self.op.name, self.op.node_name))
  
  
-class LUNodeEvacStrategy(NoHooksLU):
-  """Computes the node evacuation strategy.
+class LUNodeEvacuate(NoHooksLU):
+  """Evacuates instances off a list of nodes.
  
    """
    REQ_BGL = False
  
+  _MODE2IALLOCATOR = {
+    constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
+    constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
+    constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
+    }
+  assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
+  assert (frozenset(_MODE2IALLOCATOR.values()) ==
+          constants.IALLOCATOR_NEVAC_MODES)
+
    def CheckArguments(self):
      _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
  
    def ExpandNames(self):
-    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
-    self.needed_locks = locks = {}
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
+
+    if self.op.remote_node is not None:
+      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
+      assert self.op.remote_node
+
+      if self.op.remote_node == self.op.node_name:
+        raise errors.OpPrereqError("Can not use evacuated node as a new"
+                                   " secondary node", errors.ECODE_INVAL)
+
+      if self.op.mode != constants.NODE_EVAC_SEC:
+        raise errors.OpPrereqError("Without the use of an iallocator only"
+                                   " secondary instances can be evacuated",
+                                   errors.ECODE_INVAL)
+
+    # Declare locks
+    self.share_locks = _ShareAll()
+    self.needed_locks = {
+      locking.LEVEL_INSTANCE: [],
+      locking.LEVEL_NODEGROUP: [],
+      locking.LEVEL_NODE: [],
+      }
+
+    # Determine nodes (via group) optimistically, needs verification once locks
+    # have been acquired
+    self.lock_nodes = self._DetermineNodes()
+
+  def _DetermineNodes(self):
+    """Gets the list of nodes to operate on.
+
+    """
      if self.op.remote_node is None:
-      locks[locking.LEVEL_NODE] = locking.ALL_SET
+      # Iallocator will choose any node(s) in the same group
+      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
      else:
-      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
-      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
+      group_nodes = frozenset([self.op.remote_node])
  
-  def Exec(self, feedback_fn):
-    instances = []
-    for node in self.op.nodes:
-      instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
-    if not instances:
-      return []
+    # Determine nodes to be locked
+    return set([self.op.node_name]) | group_nodes
+
+  def _DetermineInstances(self):
+    """Builds list of instances to operate on.
+
+    """
+    assert self.op.mode in constants.NODE_EVAC_MODES
+
+    if self.op.mode == constants.NODE_EVAC_PRI:
+      # Primary instances only
+      inst_fn = _GetNodePrimaryInstances
+      assert self.op.remote_node is None, \
+        "Evacuating primary instances requires iallocator"
+    elif self.op.mode == constants.NODE_EVAC_SEC:
+      # Secondary instances only
+      inst_fn = _GetNodeSecondaryInstances
+    else:
+      # All instances
+      assert self.op.mode == constants.NODE_EVAC_ALL
+      inst_fn = _GetNodeInstances
+      # TODO: In 2.6, change the iallocator interface to take an evacuation mode
+      # per instance
+      raise errors.OpPrereqError("Due to an issue with the iallocator"
+                                 " interface it is not possible to evacuate"
+                                 " all instances at once; specify explicitly"
+                                 " whether to evacuate primary or secondary"
+                                 " instances",
+                                 errors.ECODE_INVAL)
+
+    return inst_fn(self.cfg, self.op.node_name)
+
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_INSTANCE:
+      # Lock instances optimistically, needs verification once node and group
+      # locks have been acquired
+      self.needed_locks[locking.LEVEL_INSTANCE] = \
+        set(i.name for i in self._DetermineInstances())
+
+    elif level == locking.LEVEL_NODEGROUP:
+      # Lock node groups for all potential target nodes optimistically, needs
+      # verification once nodes have been acquired
+      self.needed_locks[locking.LEVEL_NODEGROUP] = \
+        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
+
+    elif level == locking.LEVEL_NODE:
+      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
+
+  def CheckPrereq(self):
+    # Verify locks
+    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
+    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
+    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
+
+    need_nodes = self._DetermineNodes()
+
+    if not owned_nodes.issuperset(need_nodes):
+      raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
+                                 " locks were acquired, current nodes are"
+                                 " are '%s', used to be '%s'; retry the"
+                                 " operation" %
+                                 (self.op.node_name,
+                                  utils.CommaJoin(need_nodes),
+                                  utils.CommaJoin(owned_nodes)),
+                                 errors.ECODE_STATE)
+
+    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
+    if owned_groups != wanted_groups:
+      raise errors.OpExecError("Node groups changed since locks were acquired,"
+                               " current groups are '%s', used to be '%s';"
+                               " retry the operation" %
+                               (utils.CommaJoin(wanted_groups),
+                                utils.CommaJoin(owned_groups)))
+
+    # Determine affected instances
+    self.instances = self._DetermineInstances()
+    self.instance_names = [i.name for i in self.instances]
+
+    if set(self.instance_names) != owned_instances:
+      raise errors.OpExecError("Instances on node '%s' changed since locks"
+                               " were acquired, current instances are '%s',"
+                               " used to be '%s'; retry the operation" %
+                               (self.op.node_name,
+                                utils.CommaJoin(self.instance_names),
+                                utils.CommaJoin(owned_instances)))
+
+    if self.instance_names:
+      self.LogInfo("Evacuating instances from node '%s': %s",
+                   self.op.node_name,
+                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
+    else:
+      self.LogInfo("No instances to evacuate from node '%s'",
+                   self.op.node_name)
  
      if self.op.remote_node is not None:
-      result = []
-      for i in instances:
+      for i in self.instances:
          if i.primary_node == self.op.remote_node:
            raise errors.OpPrereqError("Node %s is the primary node of"
                                       " instance %s, cannot use it as"
                                       " secondary" %
                                       (self.op.remote_node, i.name),
                                       errors.ECODE_INVAL)
-        result.append([i.name, self.op.remote_node])
-    else:
-      ial = IAllocator(self.cfg, self.rpc,
-                       mode=constants.IALLOCATOR_MODE_MEVAC,
-                       evac_nodes=self.op.nodes)
-      ial.Run(self.op.iallocator, validate=True)
+
+  def Exec(self, feedback_fn):
+    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
+
+    if not self.instance_names:
+      # No instances to evacuate
+      jobs = []
+
+    elif self.op.iallocator is not None:
+      # TODO: Implement relocation to other group
+      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
+                       evac_mode=self._MODE2IALLOCATOR[self.op.mode],
+                       instances=list(self.instance_names))
+
+      ial.Run(self.op.iallocator)
+
        if not ial.success:
-        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
-                                 errors.ECODE_NORES)
-      result = ial.result
-    return result
+        raise errors.OpPrereqError("Can't compute node evacuation using"
+                                   " iallocator '%s': %s" %
+                                   (self.op.iallocator, ial.info),
+                                   errors.ECODE_NORES)
+
+      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
+
+    elif self.op.remote_node is not None:
+      assert self.op.mode == constants.NODE_EVAC_SEC
+      jobs = [
+        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
+                                        remote_node=self.op.remote_node,
+                                        disks=[],
+                                        mode=constants.REPLACE_DISK_CHG,
+                                        early_release=self.op.early_release)]
+        for instance_name in self.instance_names
+        ]
+
+    else:
+      raise errors.ProgrammerError("No iallocator or remote node")
+
+    return ResultWithJobs(jobs)
+
+
+def _SetOpEarlyRelease(early_release, op):
+  """Sets C{early_release} flag on opcodes if available.
+
+  """
+  try:
+    op.early_release = early_release
+  except AttributeError:
+    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
+
+  return op
+
+
+def _NodeEvacDest(use_nodes, group, nodes):
+  """Returns group or nodes depending on caller's choice.
+
+  """
+  if use_nodes:
+    return utils.CommaJoin(nodes)
+  else:
+    return group
+
+
+def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
+  """Unpacks the result of change-group and node-evacuate iallocator requests.
+
+  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
+  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
+
+  @type lu: L{LogicalUnit}
+  @param lu: Logical unit instance
+  @type alloc_result: tuple/list
+  @param alloc_result: Result from iallocator
+  @type early_release: bool
+  @param early_release: Whether to release locks early if possible
+  @type use_nodes: bool
+  @param use_nodes: Whether to display node names instead of groups
+
+  """
+  (moved, failed, jobs) = alloc_result
+
+  if failed:
+    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
+                                 for (name, reason) in failed)
+    lu.LogWarning("Unable to evacuate instances %s", failreason)
+    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
+
+  if moved:
+    lu.LogInfo("Instances to be moved: %s",
+               utils.CommaJoin("%s (to %s)" %
+                               (name, _NodeEvacDest(use_nodes, group, nodes))
+                               for (name, group, nodes) in moved))
+
+  return [map(compat.partial(_SetOpEarlyRelease, early_release),
+              map(opcodes.OpCode.LoadOpCode, ops))
+          for ops in jobs]
  
  
  class LUInstanceGrowDisk(LogicalUnit):
@@ -9776,11 +11086,16 @@ class LUInstanceGrowDisk(LogicalUnit):
    def ExpandNames(self):
      self._ExpandAndLockInstance()
      self.needed_locks[locking.LEVEL_NODE] = []
-    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
+    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
  
    def DeclareLocks(self, level):
      if level == locking.LEVEL_NODE:
        self._LockInstancesNodes()
+    elif level == locking.LEVEL_NODE_RES:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -9837,10 +11152,18 @@ class LUInstanceGrowDisk(LogicalUnit):
      instance = self.instance
      disk = self.disk
  
+    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES))
+
      disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
      if not disks_ok:
        raise errors.OpExecError("Cannot activate block device to grow")
  
+    feedback_fn("Growing disk %s of instance '%s' by %s" %
+                (self.op.disk, instance.name,
+                 utils.FormatUnit(self.op.amount, "h")))
+
      # First run all grow ops in dry-run mode
      for node in instance.all_nodes:
        self.cfg.SetDiskID(disk, node)
@@ -9863,18 +11186,28 @@ class LUInstanceGrowDisk(LogicalUnit):
  
      disk.RecordGrow(self.op.amount)
      self.cfg.Update(instance, feedback_fn)
+
+    # Changes have been recorded, release node lock
+    _ReleaseLocks(self, locking.LEVEL_NODE)
+
+    # Downgrade lock while waiting for sync
+    self.glm.downgrade(locking.LEVEL_INSTANCE)
+
      if self.op.wait_for_sync:
        disk_abort = not _WaitForSync(self, instance, disks=[disk])
        if disk_abort:
          self.proc.LogWarning("Disk sync-ing has not returned a good"
                               " status; please check the instance")
-      if not instance.admin_up:
+      if instance.admin_state != constants.ADMINST_UP:
          _SafeShutdownInstanceDisks(self, instance, disks=[disk])
-    elif not instance.admin_up:
+    elif instance.admin_state != constants.ADMINST_UP:
        self.proc.LogWarning("Not shutting down the disk even if the instance is"
                             " not supposed to be running because no wait for"
                             " sync mode was requested")
  
+    assert self.owned_locks(locking.LEVEL_NODE_RES)
+    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
+
  
  class LUInstanceQueryData(NoHooksLU):
    """Query runtime instance data.
@@ -9898,7 +11231,7 @@ class LUInstanceQueryData(NoHooksLU):
        self.wanted_names = None
  
      if self.op.use_locking:
-      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+      self.share_locks = _ShareAll()
  
        if self.wanted_names is None:
          self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
@@ -9906,7 +11239,6 @@ class LUInstanceQueryData(NoHooksLU):
          self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
  
        self.needed_locks[locking.LEVEL_NODE] = []
-      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
  
    def DeclareLocks(self, level):
@@ -9921,10 +11253,10 @@ class LUInstanceQueryData(NoHooksLU):
      """
      if self.wanted_names is None:
        assert self.op.use_locking, "Locking was not used"
-      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
+      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
  
-    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
-                             for name in self.wanted_names]
+    self.wanted_instances = \
+        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
  
    def _ComputeBlockdevStatus(self, node, instance_name, dev):
      """Returns the status of a block device
@@ -9965,8 +11297,9 @@ class LUInstanceQueryData(NoHooksLU):
      dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
  
      if dev.children:
-      dev_children = [self._ComputeDiskStatus(instance, snode, child)
-                      for child in dev.children]
+      dev_children = map(compat.partial(self._ComputeDiskStatus,
+                                        instance, snode),
+                         dev.children)
      else:
        dev_children = []
  
@@ -9988,8 +11321,16 @@ class LUInstanceQueryData(NoHooksLU):
  
      cluster = self.cfg.GetClusterInfo()
  
-    for instance in self.wanted_instances:
-      if not self.op.static:
+    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
+                                          for i in self.wanted_instances)
+    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
+      if self.op.static or pnode.offline:
+        remote_state = None
+        if pnode.offline:
+          self.LogWarning("Primary node %s is marked offline, returning static"
+                          " information only for instance %s" %
+                          (pnode.name, instance.name))
+      else:
          remote_info = self.rpc.call_instance_info(instance.primary_node,
                                                    instance.name,
                                                    instance.hypervisor)
@@ -9998,20 +11339,17 @@ class LUInstanceQueryData(NoHooksLU):
          if remote_info and "state" in remote_info:
            remote_state = "up"
          else:
-          remote_state = "down"
-      else:
-        remote_state = None
-      if instance.admin_up:
-        config_state = "up"
-      else:
-        config_state = "down"
+          if instance.admin_state == constants.ADMINST_UP:
+            remote_state = "down"
+          else:
+            remote_state = instance.admin_state
  
-      disks = [self._ComputeDiskStatus(instance, None, device)
-               for device in instance.disks]
+      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
+                  instance.disks)
  
        result[instance.name] = {
          "name": instance.name,
-        "config_state": config_state,
+        "config_state": instance.admin_state,
          "run_state": remote_state,
          "pnode": instance.primary_node,
          "snodes": instance.secondary_nodes,
@@ -10047,7 +11385,8 @@ class LUInstanceSetParams(LogicalUnit):
  
    def CheckArguments(self):
      if not (self.op.nics or self.op.disks or self.op.disk_template or
-            self.op.hvparams or self.op.beparams or self.op.os_name):
+            self.op.hvparams or self.op.beparams or self.op.os_name or
+            self.op.online_inst or self.op.offline_inst):
        raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
  
      if self.op.hvparams:
@@ -10132,13 +11471,13 @@ class LUInstanceSetParams(LogicalUnit):
              raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
                                         errors.ECODE_INVAL)
  
-      nic_bridge = nic_dict.get('bridge', None)
+      nic_bridge = nic_dict.get("bridge", None)
        nic_link = nic_dict.get(constants.INIC_LINK, None)
        if nic_bridge and nic_link:
          raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
                                     " at the same time", errors.ECODE_INVAL)
        elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
-        nic_dict['bridge'] = None
+        nic_dict["bridge"] = None
        elif nic_link and nic_link.lower() == constants.VALUE_NONE:
          nic_dict[constants.INIC_LINK] = None
  
@@ -10163,7 +11502,10 @@ class LUInstanceSetParams(LogicalUnit):
  
    def ExpandNames(self):
      self._ExpandAndLockInstance()
+    # Can't even acquire node locks in shared mode as upcoming changes in
+    # Ganeti 2.6 will start to modify the node object on disk conversion
      self.needed_locks[locking.LEVEL_NODE] = []
+    self.needed_locks[locking.LEVEL_NODE_RES] = []
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
  
    def DeclareLocks(self, level):
@@ -10172,6 +11514,10 @@ class LUInstanceSetParams(LogicalUnit):
        if self.op.disk_template and self.op.remote_node:
          self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
          self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
+    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
+      # Copy node locks
+      self.needed_locks[locking.LEVEL_NODE_RES] = \
+        self.needed_locks[locking.LEVEL_NODE][:]
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -10180,14 +11526,16 @@ class LUInstanceSetParams(LogicalUnit):
  
      """
      args = dict()
-    if constants.BE_MEMORY in self.be_new:
-      args['memory'] = self.be_new[constants.BE_MEMORY]
+    if constants.BE_MINMEM in self.be_new:
+      args["minmem"] = self.be_new[constants.BE_MINMEM]
+    if constants.BE_MAXMEM in self.be_new:
+      args["maxmem"] = self.be_new[constants.BE_MAXMEM]
      if constants.BE_VCPUS in self.be_new:
-      args['vcpus'] = self.be_new[constants.BE_VCPUS]
+      args["vcpus"] = self.be_new[constants.BE_VCPUS]
      # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
      # information at all.
      if self.op.nics:
-      args['nics'] = []
+      args["nics"] = []
        nic_override = dict(self.op.nics)
        for idx, nic in enumerate(self.instance.nics):
          if idx in nic_override:
@@ -10208,16 +11556,16 @@ class LUInstanceSetParams(LogicalUnit):
            nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
          mode = nicparams[constants.NIC_MODE]
          link = nicparams[constants.NIC_LINK]
-        args['nics'].append((ip, mac, mode, link))
+        args["nics"].append((ip, mac, mode, link))
        if constants.DDM_ADD in nic_override:
          ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
          mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
          nicparams = self.nic_pnew[constants.DDM_ADD]
          mode = nicparams[constants.NIC_MODE]
          link = nicparams[constants.NIC_LINK]
-        args['nics'].append((ip, mac, mode, link))
+        args["nics"].append((ip, mac, mode, link))
        elif constants.DDM_REMOVE in nic_override:
-        del args['nics'][-1]
+        del args["nics"][-1]
  
      env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
      if self.op.disk_template:
@@ -10246,6 +11594,8 @@ class LUInstanceSetParams(LogicalUnit):
        "Cannot retrieve locked instance %s" % self.op.instance_name
      pnode = instance.primary_node
      nodelist = list(instance.all_nodes)
+    pnode_info = self.cfg.GetNodeInfo(pnode)
+    self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
  
      # OS change
      if self.op.os_name and not self.op.force:
@@ -10266,7 +11616,8 @@ class LUInstanceSetParams(LogicalUnit):
                                     " %s to %s" % (instance.disk_template,
                                                    self.op.disk_template),
                                     errors.ECODE_INVAL)
-      _CheckInstanceDown(self, instance, "cannot change disk template")
+      _CheckInstanceState(self, instance, INSTANCE_DOWN,
+                          msg="cannot change disk template")
        if self.op.disk_template in constants.DTS_INT_MIRROR:
          if self.op.remote_node == pnode:
            raise errors.OpPrereqError("Given new secondary node %s is the same"
@@ -10282,6 +11633,13 @@ class LUInstanceSetParams(LogicalUnit):
          required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
          _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
  
+        snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
+        if pnode_info.group != snode_info.group:
+          self.LogWarning("The primary and secondary nodes are in two"
+                          " different node groups; the disk parameters"
+                          " from the first disk's node group will be"
+                          " used")
+
      # hvparams processing
      if self.op.hvparams:
        hv_type = instance.hypervisor
@@ -10292,23 +11650,54 @@ class LUInstanceSetParams(LogicalUnit):
        # local check
        hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
        _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
-      self.hv_new = hv_new # the new actual values
+      self.hv_proposed = self.hv_new = hv_new # the new actual values
        self.hv_inst = i_hvdict # the new dict (without defaults)
      else:
+      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
+                                              instance.hvparams)
        self.hv_new = self.hv_inst = {}
  
      # beparams processing
      if self.op.beparams:
        i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
                                     use_none=True)
+      objects.UpgradeBeParams(i_bedict)
        utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
        be_new = cluster.SimpleFillBE(i_bedict)
-      self.be_new = be_new # the new actual values
+      self.be_proposed = self.be_new = be_new # the new actual values
        self.be_inst = i_bedict # the new dict (without defaults)
      else:
        self.be_new = self.be_inst = {}
+      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
      be_old = cluster.FillBE(instance)
  
+    # CPU param validation -- checking every time a paramtere is
+    # changed to cover all cases where either CPU mask or vcpus have
+    # changed
+    if (constants.BE_VCPUS in self.be_proposed and
+        constants.HV_CPU_MASK in self.hv_proposed):
+      cpu_list = \
+        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
+      # Verify mask is consistent with number of vCPUs. Can skip this
+      # test if only 1 entry in the CPU mask, which means same mask
+      # is applied to all vCPUs.
+      if (len(cpu_list) > 1 and
+          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
+        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
+                                   " CPU mask [%s]" %
+                                   (self.be_proposed[constants.BE_VCPUS],
+                                    self.hv_proposed[constants.HV_CPU_MASK]),
+                                   errors.ECODE_INVAL)
+
+      # Only perform this test if a new CPU mask is given
+      if constants.HV_CPU_MASK in self.hv_new:
+        # Calculate the largest CPU number requested
+        max_requested_cpu = max(map(max, cpu_list))
+        # Check that all of the instance's nodes have enough physical CPUs to
+        # satisfy the requested CPU mask
+        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
+                                max_requested_cpu + 1, instance.hypervisor)
+
      # osparams processing
      if self.op.osparams:
        i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
@@ -10319,8 +11708,9 @@ class LUInstanceSetParams(LogicalUnit):
  
      self.warn = []
  
-    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
-        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
+    #TODO(dynmem): do the appropriate check involving MINMEM
+    if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
+        be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
        mem_check_list = [pnode]
        if be_new[constants.BE_AUTO_BALANCE]:
          # either we changed auto_balance to yes or it was from before
@@ -10328,34 +11718,39 @@ class LUInstanceSetParams(LogicalUnit):
        instance_info = self.rpc.call_instance_info(pnode, instance.name,
                                                    instance.hypervisor)
        nodeinfo = self.rpc.call_node_info(mem_check_list, None,
-                                         instance.hypervisor)
+                                         [instance.hypervisor])
        pninfo = nodeinfo[pnode]
        msg = pninfo.fail_msg
        if msg:
          # Assume the primary node is unreachable and go ahead
          self.warn.append("Can't get info from primary node %s: %s" %
-                         (pnode,  msg))
-      elif not isinstance(pninfo.payload.get('memory_free', None), int):
-        self.warn.append("Node data from primary node %s doesn't contain"
-                         " free memory information" % pnode)
-      elif instance_info.fail_msg:
-        self.warn.append("Can't get instance runtime information: %s" %
-                        instance_info.fail_msg)
+                         (pnode, msg))
        else:
-        if instance_info.payload:
-          current_mem = int(instance_info.payload['memory'])
+        (_, _, (pnhvinfo, )) = pninfo.payload
+        if not isinstance(pnhvinfo.get("memory_free", None), int):
+          self.warn.append("Node data from primary node %s doesn't contain"
+                           " free memory information" % pnode)
+        elif instance_info.fail_msg:
+          self.warn.append("Can't get instance runtime information: %s" %
+                          instance_info.fail_msg)
          else:
-          # Assume instance not running
-          # (there is a slight race condition here, but it's not very probable,
-          # and we have no other way to check)
-          current_mem = 0
-        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
-                    pninfo.payload['memory_free'])
-        if miss_mem > 0:
-          raise errors.OpPrereqError("This change will prevent the instance"
-                                     " from starting, due to %d MB of memory"
-                                     " missing on its primary node" % miss_mem,
-                                     errors.ECODE_NORES)
+          if instance_info.payload:
+            current_mem = int(instance_info.payload["memory"])
+          else:
+            # Assume instance not running
+            # (there is a slight race condition here, but it's not very
+            # probable, and we have no other way to check)
+            # TODO: Describe race condition
+            current_mem = 0
+          #TODO(dynmem): do the appropriate check involving MINMEM
+          miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
+                      pnhvinfo["memory_free"])
+          if miss_mem > 0:
+            raise errors.OpPrereqError("This change will prevent the instance"
+                                       " from starting, due to %d MB of memory"
+                                       " missing on its primary node" %
+                                       miss_mem,
+                                       errors.ECODE_NORES)
  
        if be_new[constants.BE_AUTO_BALANCE]:
          for node, nres in nodeinfo.items():
@@ -10363,11 +11758,13 @@ class LUInstanceSetParams(LogicalUnit):
              continue
            nres.Raise("Can't get info from secondary node %s" % node,
                       prereq=True, ecode=errors.ECODE_STATE)
-          if not isinstance(nres.payload.get('memory_free', None), int):
+          (_, _, (nhvinfo, )) = nres.payload
+          if not isinstance(nhvinfo.get("memory_free", None), int):
              raise errors.OpPrereqError("Secondary node %s didn't return free"
                                         " memory information" % node,
                                         errors.ECODE_STATE)
-          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
+          #TODO(dynmem): do the appropriate check involving MINMEM
+          elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
              raise errors.OpPrereqError("This change will prevent the instance"
                                         " from failover to its secondary node"
                                         " %s, due to not enough memory" % node,
@@ -10403,8 +11800,8 @@ class LUInstanceSetParams(LogicalUnit):
                                   for key in constants.NICS_PARAMETERS
                                   if key in nic_dict])
  
-      if 'bridge' in nic_dict:
-        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
+      if "bridge" in nic_dict:
+        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
  
        new_nic_params = _GetUpdatedParams(old_nic_params,
                                           update_params_dict)
@@ -10430,12 +11827,12 @@ class LUInstanceSetParams(LogicalUnit):
          else:
            nic_ip = old_nic_ip
          if nic_ip is None:
-          raise errors.OpPrereqError('Cannot set the nic ip to None'
-                                     ' on a routed nic', errors.ECODE_INVAL)
+          raise errors.OpPrereqError("Cannot set the nic ip to None"
+                                     " on a routed nic", errors.ECODE_INVAL)
        if constants.INIC_MAC in nic_dict:
          nic_mac = nic_dict[constants.INIC_MAC]
          if nic_mac is None:
-          raise errors.OpPrereqError('Cannot set the nic mac to None',
+          raise errors.OpPrereqError("Cannot set the nic mac to None",
                                       errors.ECODE_INVAL)
          elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
            # otherwise generate the mac
@@ -10460,7 +11857,8 @@ class LUInstanceSetParams(LogicalUnit):
          if len(instance.disks) == 1:
            raise errors.OpPrereqError("Cannot remove the last disk of"
                                       " an instance", errors.ECODE_INVAL)
-        _CheckInstanceDown(self, instance, "cannot remove disks")
+        _CheckInstanceState(self, instance, INSTANCE_DOWN,
+                            msg="cannot remove disks")
  
        if (disk_op == constants.DDM_ADD and
            len(instance.disks) >= constants.MAX_DISKS):
@@ -10475,7 +11873,15 @@ class LUInstanceSetParams(LogicalUnit):
                                       (disk_op, len(instance.disks)),
                                       errors.ECODE_INVAL)
  
-    return
+    # disabling the instance
+    if self.op.offline_inst:
+      _CheckInstanceState(self, instance, INSTANCE_DOWN,
+                          msg="cannot change instance state to offline")
+
+    # enabling the instance
+    if self.op.online_inst:
+      _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
+                          msg="cannot make instance go online")
  
    def _ConvertPlainToDrbd(self, feedback_fn):
      """Converts an instance from plain to drbd.
@@ -10486,13 +11892,16 @@ class LUInstanceSetParams(LogicalUnit):
      pnode = instance.primary_node
      snode = self.op.remote_node
  
+    assert instance.disk_template == constants.DT_PLAIN
+
      # create a fake disk info for _GenerateDiskTemplate
      disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
                    constants.IDISK_VG: d.logical_id[0]}
                   for d in instance.disks]
      new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
                                        instance.name, pnode, [snode],
-                                      disk_info, None, None, 0, feedback_fn)
+                                      disk_info, None, None, 0, feedback_fn,
+                                      self.diskparams)
      info = _GetInstanceInfoText(instance)
      feedback_fn("Creating aditional volumes...")
      # first, create the missing data and meta devices
@@ -10522,6 +11931,9 @@ class LUInstanceSetParams(LogicalUnit):
      instance.disks = new_disks
      self.cfg.Update(instance, feedback_fn)
  
+    # Release node locks while waiting for sync
+    _ReleaseLocks(self, locking.LEVEL_NODE)
+
      # disks are created, waiting for sync
      disk_abort = not _WaitForSync(self, instance,
                                    oneshot=not self.op.wait_for_sync)
@@ -10529,12 +11941,17 @@ class LUInstanceSetParams(LogicalUnit):
        raise errors.OpExecError("There are some degraded disks for"
                                 " this instance, please cleanup manually")
  
+    # Node resource locks will be released by caller
+
    def _ConvertDrbdToPlain(self, feedback_fn):
      """Converts an instance from drbd to plain.
  
      """
      instance = self.instance
+
      assert len(instance.secondary_nodes) == 1
+    assert instance.disk_template == constants.DT_DRBD8
+
      pnode = instance.primary_node
      snode = instance.secondary_nodes[0]
      feedback_fn("Converting template to plain")
@@ -10552,6 +11969,9 @@ class LUInstanceSetParams(LogicalUnit):
      instance.disk_template = constants.DT_PLAIN
      self.cfg.Update(instance, feedback_fn)
  
+    # Release locks in case removing disks takes a while
+    _ReleaseLocks(self, locking.LEVEL_NODE)
+
      feedback_fn("Removing volumes on the secondary node...")
      for disk in old_disks:
        self.cfg.SetDiskID(disk, snode)
@@ -10569,6 +11989,13 @@ class LUInstanceSetParams(LogicalUnit):
          self.LogWarning("Could not remove metadata for disk %d on node %s,"
                          " continuing anyway: %s", idx, pnode, msg)
  
+    # this is a DRBD disk, return its port to the pool
+    for disk in old_disks:
+      tcp_port = disk.logical_id[2]
+      self.cfg.AddTcpUdpPort(tcp_port)
+
+    # Node resource locks will be released by caller
+
    def Exec(self, feedback_fn):
      """Modifies an instance.
  
@@ -10580,6 +12007,10 @@ class LUInstanceSetParams(LogicalUnit):
      for warn in self.warn:
        feedback_fn("WARNING: %s" % warn)
  
+    assert ((self.op.disk_template is None) ^
+            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
+      "Not owning any node resource locks"
+
      result = []
      instance = self.instance
      # disk changes
@@ -10595,6 +12026,11 @@ class LUInstanceSetParams(LogicalUnit):
              self.LogWarning("Could not remove disk/%d on node %s: %s,"
                              " continuing anyway", device_idx, node, msg)
          result.append(("disk/%d" % device_idx, "remove"))
+
+        # if this is a DRBD disk, return its port to the pool
+        if device.dev_type in constants.LDS_DRBD:
+          tcp_port = device.logical_id[2]
+          self.cfg.AddTcpUdpPort(tcp_port)
        elif disk_op == constants.DDM_ADD:
          # add a new disk
          if instance.disk_template in (constants.DT_FILE,
@@ -10611,7 +12047,9 @@ class LUInstanceSetParams(LogicalUnit):
                                           [disk_dict],
                                           file_path,
                                           file_driver,
-                                         disk_idx_base, feedback_fn)[0]
+                                         disk_idx_base,
+                                         feedback_fn,
+                                         self.diskparams)[0]
          instance.disks.append(new_disk)
          info = _GetInstanceInfoText(instance)
  
@@ -10637,6 +12075,16 @@ class LUInstanceSetParams(LogicalUnit):
                         disk_dict[constants.IDISK_MODE]))
  
      if self.op.disk_template:
+      if __debug__:
+        check_nodes = set(instance.all_nodes)
+        if self.op.remote_node:
+          check_nodes.add(self.op.remote_node)
+        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
+          owned = self.owned_locks(level)
+          assert not (check_nodes - owned), \
+            ("Not owning the correct locks, owning %r, expected at least %r" %
+             (owned, check_nodes))
+
        r_shut = _ShutdownInstanceDisks(self, instance)
        if not r_shut:
          raise errors.OpExecError("Cannot shutdown instance disks, unable to"
@@ -10649,6 +12097,15 @@ class LUInstanceSetParams(LogicalUnit):
          raise
        result.append(("disk_template", self.op.disk_template))
  
+      assert instance.disk_template == self.op.disk_template, \
+        ("Expected disk template '%s', found '%s'" %
+         (self.op.disk_template, instance.disk_template))
+
+    # Release node and resource locks if there are any (they might already have
+    # been released during disk conversion)
+    _ReleaseLocks(self, locking.LEVEL_NODE)
+    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
+
      # NIC changes
      for nic_op, nic_dict in self.op.nics:
        if nic_op == constants.DDM_REMOVE:
@@ -10699,8 +12156,20 @@ class LUInstanceSetParams(LogicalUnit):
        for key, val in self.op.osparams.iteritems():
          result.append(("os/%s" % key, val))
  
+    # online/offline instance
+    if self.op.online_inst:
+      self.cfg.MarkInstanceDown(instance.name)
+      result.append(("admin_state", constants.ADMINST_DOWN))
+    if self.op.offline_inst:
+      self.cfg.MarkInstanceOffline(instance.name)
+      result.append(("admin_state", constants.ADMINST_OFFLINE))
+
      self.cfg.Update(instance, feedback_fn)
  
+    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
+                self.owned_locks(locking.LEVEL_NODE)), \
+      "All node locks should have been released by now"
+
      return result
  
    _DISK_CONVERSIONS = {
@@ -10709,6 +12178,147 @@ class LUInstanceSetParams(LogicalUnit):
      }
  
  
+class LUInstanceChangeGroup(LogicalUnit):
+  HPATH = "instance-change-group"
+  HTYPE = constants.HTYPE_INSTANCE
+  REQ_BGL = False
+
+  def ExpandNames(self):
+    self.share_locks = _ShareAll()
+    self.needed_locks = {
+      locking.LEVEL_NODEGROUP: [],
+      locking.LEVEL_NODE: [],
+      }
+
+    self._ExpandAndLockInstance()
+
+    if self.op.target_groups:
+      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
+                                  self.op.target_groups)
+    else:
+      self.req_target_uuids = None
+
+    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
+
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_NODEGROUP:
+      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
+
+      if self.req_target_uuids:
+        lock_groups = set(self.req_target_uuids)
+
+        # Lock all groups used by instance optimistically; this requires going
+        # via the node before it's locked, requiring verification later on
+        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
+        lock_groups.update(instance_groups)
+      else:
+        # No target groups, need to lock all of them
+        lock_groups = locking.ALL_SET
+
+      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
+
+    elif level == locking.LEVEL_NODE:
+      if self.req_target_uuids:
+        # Lock all nodes used by instances
+        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+        self._LockInstancesNodes()
+
+        # Lock all nodes in all potential target groups
+        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
+                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
+        member_nodes = [node_name
+                        for group in lock_groups
+                        for node_name in self.cfg.GetNodeGroup(group).members]
+        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
+      else:
+        # Lock all nodes as all groups are potential targets
+        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+
+  def CheckPrereq(self):
+    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
+
+    assert (self.req_target_uuids is None or
+            owned_groups.issuperset(self.req_target_uuids))
+    assert owned_instances == set([self.op.instance_name])
+
+    # Get instance information
+    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+
+    # Check if node groups for locked instance are still correct
+    assert owned_nodes.issuperset(self.instance.all_nodes), \
+      ("Instance %s's nodes changed while we kept the lock" %
+       self.op.instance_name)
+
+    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
+                                           owned_groups)
+
+    if self.req_target_uuids:
+      # User requested specific target groups
+      self.target_uuids = self.req_target_uuids
+    else:
+      # All groups except those used by the instance are potential targets
+      self.target_uuids = owned_groups - inst_groups
+
+    conflicting_groups = self.target_uuids & inst_groups
+    if conflicting_groups:
+      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
+                                 " used by the instance '%s'" %
+                                 (utils.CommaJoin(conflicting_groups),
+                                  self.op.instance_name),
+                                 errors.ECODE_INVAL)
+
+    if not self.target_uuids:
+      raise errors.OpPrereqError("There are no possible target groups",
+                                 errors.ECODE_INVAL)
+
+  def BuildHooksEnv(self):
+    """Build hooks env.
+
+    """
+    assert self.target_uuids
+
+    env = {
+      "TARGET_GROUPS": " ".join(self.target_uuids),
+      }
+
+    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
+
+    return env
+
+  def BuildHooksNodes(self):
+    """Build hooks nodes.
+
+    """
+    mn = self.cfg.GetMasterNode()
+    return ([mn], [mn])
+
+  def Exec(self, feedback_fn):
+    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
+
+    assert instances == [self.op.instance_name], "Instance not locked"
+
+    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
+                     instances=instances, target_groups=list(self.target_uuids))
+
+    ial.Run(self.op.iallocator)
+
+    if not ial.success:
+      raise errors.OpPrereqError("Can't compute solution for changing group of"
+                                 " instance '%s' using iallocator '%s': %s" %
+                                 (self.op.instance_name, self.op.iallocator,
+                                  ial.info),
+                                 errors.ECODE_NORES)
+
+    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
+
+    self.LogInfo("Iallocator returned %s job(s) for changing group of"
+                 " instance '%s'", len(jobs), self.op.instance_name)
+
+    return ResultWithJobs(jobs)
+
+
  class LUBackupQuery(NoHooksLU):
    """Query the exports list
  
@@ -10733,7 +12343,7 @@ class LUBackupQuery(NoHooksLU):
          that node.
  
      """
-    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
+    self.nodes = self.owned_locks(locking.LEVEL_NODE)
      rpcresult = self.rpc.call_export_list(self.nodes)
      result = {}
      for node in rpcresult:
@@ -10882,7 +12492,8 @@ class LUBackupExport(LogicalUnit):
            "Cannot retrieve locked instance %s" % self.op.instance_name
      _CheckNodeOnline(self, self.instance.primary_node)
  
-    if (self.op.remove_instance and self.instance.admin_up and
+    if (self.op.remove_instance and
+        self.instance.admin_state == constants.ADMINST_UP and
          not self.op.shutdown):
        raise errors.OpPrereqError("Can not remove instance without shutting it"
                                   " down before")
@@ -11012,7 +12623,7 @@ class LUBackupExport(LogicalUnit):
      for disk in instance.disks:
        self.cfg.SetDiskID(disk, src_node)
  
-    activate_disks = (not instance.admin_up)
+    activate_disks = (instance.admin_state != constants.ADMINST_UP)
  
      if activate_disks:
        # Activate the instance disks if we'exporting a stopped instance
@@ -11025,11 +12636,13 @@ class LUBackupExport(LogicalUnit):
  
        helper.CreateSnapshots()
        try:
-        if (self.op.shutdown and instance.admin_up and
+        if (self.op.shutdown and
+            instance.admin_state == constants.ADMINST_UP and
              not self.op.remove_instance):
            assert not activate_disks
            feedback_fn("Starting instance %s" % instance.name)
-          result = self.rpc.call_instance_start(src_node, instance, None, None)
+          result = self.rpc.call_instance_start(src_node,
+                                                (instance, None, None), False)
            msg = result.fail_msg
            if msg:
              feedback_fn("Failed to start instance: %s" % msg)
@@ -11115,7 +12728,7 @@ class LUBackupRemove(NoHooksLU):
        fqdn_warn = True
        instance_name = self.op.instance_name
  
-    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
+    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
      exportlist = self.rpc.call_export_list(locked_nodes)
      found = False
      for node in exportlist:
@@ -11173,6 +12786,19 @@ class LUGroupAdd(LogicalUnit):
      if self.op.ndparams:
        utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
  
+    if self.op.diskparams:
+      for templ in constants.DISK_TEMPLATES:
+        if templ not in self.op.diskparams:
+          self.op.diskparams[templ] = {}
+        utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
+    else:
+      self.op.diskparams = self.cfg.GetClusterInfo().diskparams
+
+    if self.op.ipolicy:
+      cluster = self.cfg.GetClusterInfo()
+      full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
+      objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
+
    def BuildHooksEnv(self):
      """Build hooks env.
  
@@ -11195,7 +12821,9 @@ class LUGroupAdd(LogicalUnit):
      group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
                                    uuid=self.group_uuid,
                                    alloc_policy=self.op.alloc_policy,
-                                  ndparams=self.op.ndparams)
+                                  ndparams=self.op.ndparams,
+                                  diskparams=self.op.diskparams,
+                                  ipolicy=self.op.ipolicy)
  
      self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
      del self.remove_locks[locking.LEVEL_NODEGROUP]
@@ -11235,12 +12863,12 @@ class LUGroupAssignNodes(NoHooksLU):
  
      """
      assert self.needed_locks[locking.LEVEL_NODEGROUP]
-    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
+    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
              frozenset(self.op.nodes))
  
      expected_locks = (set([self.group_uuid]) |
                        self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
-    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
+    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
      if actual_locks != expected_locks:
        raise errors.OpExecError("Nodes changed groups since locks were acquired,"
                                 " current groups are '%s', used to be '%s'" %
@@ -11280,13 +12908,9 @@ class LUGroupAssignNodes(NoHooksLU):
      """Assign nodes to a new group.
  
      """
-    for node in self.op.nodes:
-      self.node_data[node].group = self.group_uuid
-
-    # FIXME: Depends on side-effects of modifying the result of
-    # C{cfg.GetAllNodesInfo}
+    mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
  
-    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
+    self.cfg.AssignGroupNodes(mods)
  
    @staticmethod
    def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
@@ -11345,6 +12969,7 @@ class _GroupQuery(_QueryBase):
      lu.needed_locks = {}
  
      self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
+    self._cluster = lu.cfg.GetClusterInfo()
      name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
  
      if not self.names:
@@ -11410,7 +13035,8 @@ class _GroupQuery(_QueryBase):
            # Do not pass on node information if it was not requested.
            group_to_nodes = None
  
-    return query.GroupQueryData([self._all_groups[uuid]
+    return query.GroupQueryData(self._cluster,
+                                [self._all_groups[uuid]
                                   for uuid in self.wanted],
                                  group_to_nodes, group_to_instances)
  
@@ -11428,6 +13054,9 @@ class LUGroupQuery(NoHooksLU):
    def ExpandNames(self):
      self.gq.ExpandNames(self)
  
+  def DeclareLocks(self, level):
+    self.gq.DeclareLocks(self, level)
+
    def Exec(self, feedback_fn):
      return self.gq.OldStyleQuery(self)
  
@@ -11443,7 +13072,11 @@ class LUGroupSetParams(LogicalUnit):
    def CheckArguments(self):
      all_changes = [
        self.op.ndparams,
+      self.op.diskparams,
        self.op.alloc_policy,
+      self.op.hv_state,
+      self.op.disk_state,
+      self.op.ipolicy,
        ]
  
      if all_changes.count(None) == len(all_changes):
@@ -11473,6 +13106,35 @@ class LUGroupSetParams(LogicalUnit):
        utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
        self.new_ndparams = new_ndparams
  
+    if self.op.diskparams:
+      self.new_diskparams = dict()
+      for templ in constants.DISK_TEMPLATES:
+        if templ not in self.op.diskparams:
+          self.op.diskparams[templ] = {}
+        new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
+                                             self.op.diskparams[templ])
+        utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
+        self.new_diskparams[templ] = new_templ_params
+
+    if self.op.hv_state:
+      self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
+                                                 self.group.hv_state_static)
+
+    if self.op.disk_state:
+      self.new_disk_state = \
+        _MergeAndVerifyDiskState(self.op.disk_state,
+                                 self.group.disk_state_static)
+
+    if self.op.ipolicy:
+      g_ipolicy = {}
+      for key, value in self.op.ipolicy.iteritems():
+        g_ipolicy[key] = _GetUpdatedParams(self.group.ipolicy.get(key, {}),
+                                           value,
+                                           use_none=True)
+        utils.ForceDictType(g_ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
+      self.new_ipolicy = g_ipolicy
+      objects.InstancePolicy.CheckParameterSyntax(self.new_ipolicy)
+
    def BuildHooksEnv(self):
      """Build hooks env.
  
@@ -11499,14 +13161,26 @@ class LUGroupSetParams(LogicalUnit):
        self.group.ndparams = self.new_ndparams
        result.append(("ndparams", str(self.group.ndparams)))
  
+    if self.op.diskparams:
+      self.group.diskparams = self.new_diskparams
+      result.append(("diskparams", str(self.group.diskparams)))
+
      if self.op.alloc_policy:
        self.group.alloc_policy = self.op.alloc_policy
  
+    if self.op.hv_state:
+      self.group.hv_state_static = self.new_hv_state
+
+    if self.op.disk_state:
+      self.group.disk_state_static = self.new_disk_state
+
+    if self.op.ipolicy:
+      self.group.ipolicy = self.new_ipolicy
+
      self.cfg.Update(self.group, feedback_fn)
      return result
  
  
-
  class LUGroupRemove(LogicalUnit):
    HPATH = "group-remove"
    HTYPE = constants.HTYPE_GROUP
@@ -11643,7 +13317,163 @@ class LUGroupRename(LogicalUnit):
      return self.op.new_name
  
  
-class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
+class LUGroupEvacuate(LogicalUnit):
+  HPATH = "group-evacuate"
+  HTYPE = constants.HTYPE_GROUP
+  REQ_BGL = False
+
+  def ExpandNames(self):
+    # This raises errors.OpPrereqError on its own:
+    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
+
+    if self.op.target_groups:
+      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
+                                  self.op.target_groups)
+    else:
+      self.req_target_uuids = []
+
+    if self.group_uuid in self.req_target_uuids:
+      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
+                                 " as a target group (targets are %s)" %
+                                 (self.group_uuid,
+                                  utils.CommaJoin(self.req_target_uuids)),
+                                 errors.ECODE_INVAL)
+
+    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
+
+    self.share_locks = _ShareAll()
+    self.needed_locks = {
+      locking.LEVEL_INSTANCE: [],
+      locking.LEVEL_NODEGROUP: [],
+      locking.LEVEL_NODE: [],
+      }
+
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_INSTANCE:
+      assert not self.needed_locks[locking.LEVEL_INSTANCE]
+
+      # Lock instances optimistically, needs verification once node and group
+      # locks have been acquired
+      self.needed_locks[locking.LEVEL_INSTANCE] = \
+        self.cfg.GetNodeGroupInstances(self.group_uuid)
+
+    elif level == locking.LEVEL_NODEGROUP:
+      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
+
+      if self.req_target_uuids:
+        lock_groups = set([self.group_uuid] + self.req_target_uuids)
+
+        # Lock all groups used by instances optimistically; this requires going
+        # via the node before it's locked, requiring verification later on
+        lock_groups.update(group_uuid
+                           for instance_name in
+                             self.owned_locks(locking.LEVEL_INSTANCE)
+                           for group_uuid in
+                             self.cfg.GetInstanceNodeGroups(instance_name))
+      else:
+        # No target groups, need to lock all of them
+        lock_groups = locking.ALL_SET
+
+      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
+
+    elif level == locking.LEVEL_NODE:
+      # This will only lock the nodes in the group to be evacuated which
+      # contain actual instances
+      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+      self._LockInstancesNodes()
+
+      # Lock all nodes in group to be evacuated and target groups
+      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+      assert self.group_uuid in owned_groups
+      member_nodes = [node_name
+                      for group in owned_groups
+                      for node_name in self.cfg.GetNodeGroup(group).members]
+      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
+
+  def CheckPrereq(self):
+    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
+
+    assert owned_groups.issuperset(self.req_target_uuids)
+    assert self.group_uuid in owned_groups
+
+    # Check if locked instances are still correct
+    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
+
+    # Get instance information
+    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
+
+    # Check if node groups for locked instances are still correct
+    for instance_name in owned_instances:
+      inst = self.instances[instance_name]
+      assert owned_nodes.issuperset(inst.all_nodes), \
+        "Instance %s's nodes changed while we kept the lock" % instance_name
+
+      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
+                                             owned_groups)
+
+      assert self.group_uuid in inst_groups, \
+        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
+
+    if self.req_target_uuids:
+      # User requested specific target groups
+      self.target_uuids = self.req_target_uuids
+    else:
+      # All groups except the one to be evacuated are potential targets
+      self.target_uuids = [group_uuid for group_uuid in owned_groups
+                           if group_uuid != self.group_uuid]
+
+      if not self.target_uuids:
+        raise errors.OpPrereqError("There are no possible target groups",
+                                   errors.ECODE_INVAL)
+
+  def BuildHooksEnv(self):
+    """Build hooks env.
+
+    """
+    return {
+      "GROUP_NAME": self.op.group_name,
+      "TARGET_GROUPS": " ".join(self.target_uuids),
+      }
+
+  def BuildHooksNodes(self):
+    """Build hooks nodes.
+
+    """
+    mn = self.cfg.GetMasterNode()
+
+    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
+
+    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
+
+    return (run_nodes, run_nodes)
+
+  def Exec(self, feedback_fn):
+    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
+
+    assert self.group_uuid not in self.target_uuids
+
+    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
+                     instances=instances, target_groups=self.target_uuids)
+
+    ial.Run(self.op.iallocator)
+
+    if not ial.success:
+      raise errors.OpPrereqError("Can't compute group evacuation using"
+                                 " iallocator '%s': %s" %
+                                 (self.op.iallocator, ial.info),
+                                 errors.ECODE_NORES)
+
+    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
+
+    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
+                 len(jobs), self.op.group_name)
+
+    return ResultWithJobs(jobs)
+
+
+class TagsLU(NoHooksLU): # pylint: disable=W0223
    """Generic tags LU.
  
    This is an abstract class which is the parent of all the other tags LUs.
@@ -11691,7 +13521,7 @@ class LUTagsGet(TagsLU):
      TagsLU.ExpandNames(self)
  
      # Share locks as this is only a read operation
-    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+    self.share_locks = _ShareAll()
  
    def Exec(self, feedback_fn):
      """Returns the tag list.
@@ -11903,7 +13733,7 @@ class LUTestJqueue(NoHooksLU):
      # Wait for client to close
      try:
        try:
-        # pylint: disable-msg=E1101
+        # pylint: disable=E1101
          # Instance of '_socketobject' has no ... member
          conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
          conn.recv(1)
@@ -12000,12 +13830,12 @@ class IAllocator(object):
        easy usage
  
    """
-  # pylint: disable-msg=R0902
+  # pylint: disable=R0902
    # lots of instance attributes
  
-  def __init__(self, cfg, rpc, mode, **kwargs):
+  def __init__(self, cfg, rpc_runner, mode, **kwargs):
      self.cfg = cfg
-    self.rpc = rpc
+    self.rpc = rpc_runner
      # init buffer variables
      self.in_text = self.out_text = self.in_data = self.out_data = None
      # init all input fields so that pylint is happy
@@ -12015,9 +13845,8 @@ class IAllocator(object):
      self.hypervisor = None
      self.relocate_from = None
      self.name = None
-    self.evac_nodes = None
      self.instances = None
-    self.reloc_mode = None
+    self.evac_mode = None
      self.target_groups = []
      # computed fields
      self.required_nodes = None
@@ -12071,12 +13900,11 @@ class IAllocator(object):
        hypervisor_name = self.hypervisor
      elif self.mode == constants.IALLOCATOR_MODE_RELOC:
        hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
-    elif self.mode in (constants.IALLOCATOR_MODE_MEVAC,
-                       constants.IALLOCATOR_MODE_MRELOC):
-      hypervisor_name = cluster_info.enabled_hypervisors[0]
+    else:
+      hypervisor_name = cluster_info.primary_hypervisor
  
-    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
-                                        hypervisor_name)
+    node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
+                                        [hypervisor_name])
      node_iinfo = \
        self.rpc.call_all_instances_info(node_list,
                                         cluster_info.enabled_hypervisors)
@@ -12138,6 +13966,7 @@ class IAllocator(object):
      @param node_results: the basic node structures as filled from the config
  
      """
+    #TODO(dynmem): compute the right data on MAX and MIN memory
      # make a copy of the current dict
      node_results = dict(node_results)
      for nname, nresult in node_data.items():
@@ -12148,10 +13977,10 @@ class IAllocator(object):
          nresult.Raise("Can't get data for node %s" % nname)
          node_iinfo[nname].Raise("Can't get node instance info from node %s" %
                                  nname)
-        remote_info = nresult.payload
+        remote_info = _MakeLegacyNodeInfo(nresult.payload)
  
-        for attr in ['memory_total', 'memory_free', 'memory_dom0',
-                     'vg_size', 'vg_free', 'cpu_total']:
+        for attr in ["memory_total", "memory_free", "memory_dom0",
+                     "vg_size", "vg_free", "cpu_total"]:
            if attr not in remote_info:
              raise errors.OpExecError("Node '%s' didn't return attribute"
                                       " '%s'" % (nname, attr))
@@ -12163,25 +13992,25 @@ class IAllocator(object):
          i_p_mem = i_p_up_mem = 0
          for iinfo, beinfo in i_list:
            if iinfo.primary_node == nname:
-            i_p_mem += beinfo[constants.BE_MEMORY]
+            i_p_mem += beinfo[constants.BE_MAXMEM]
              if iinfo.name not in node_iinfo[nname].payload:
                i_used_mem = 0
              else:
-              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
-            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
-            remote_info['memory_free'] -= max(0, i_mem_diff)
+              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
+            i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
+            remote_info["memory_free"] -= max(0, i_mem_diff)
  
-            if iinfo.admin_up:
-              i_p_up_mem += beinfo[constants.BE_MEMORY]
+            if iinfo.admin_state == constants.ADMINST_UP:
+              i_p_up_mem += beinfo[constants.BE_MAXMEM]
  
          # compute memory used by instances
          pnr_dyn = {
-          "total_memory": remote_info['memory_total'],
-          "reserved_memory": remote_info['memory_dom0'],
-          "free_memory": remote_info['memory_free'],
-          "total_disk": remote_info['vg_size'],
-          "free_disk": remote_info['vg_free'],
-          "total_cpus": remote_info['cpu_total'],
+          "total_memory": remote_info["memory_total"],
+          "reserved_memory": remote_info["memory_dom0"],
+          "free_memory": remote_info["memory_free"],
+          "total_disk": remote_info["vg_size"],
+          "free_disk": remote_info["vg_free"],
+          "total_cpus": remote_info["cpu_total"],
            "i_pri_memory": i_p_mem,
            "i_pri_up_memory": i_p_up_mem,
            }
@@ -12211,9 +14040,9 @@ class IAllocator(object):
          nic_data.append(nic_dict)
        pir = {
          "tags": list(iinfo.GetTags()),
-        "admin_up": iinfo.admin_up,
+        "admin_state": iinfo.admin_state,
          "vcpus": beinfo[constants.BE_VCPUS],
-        "memory": beinfo[constants.BE_MEMORY],
+        "memory": beinfo[constants.BE_MAXMEM],
          "os": iinfo.os,
          "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
          "nics": nic_data,
@@ -12298,22 +14127,21 @@ class IAllocator(object):
        }
      return request
  
-  def _AddEvacuateNodes(self):
-    """Add evacuate nodes data to allocator structure.
+  def _AddNodeEvacuate(self):
+    """Get data for node-evacuate requests.
  
      """
-    request = {
-      "evac_nodes": self.evac_nodes
+    return {
+      "instances": self.instances,
+      "evac_mode": self.evac_mode,
        }
-    return request
  
-  def _AddMultiRelocate(self):
-    """Get data for multi-relocate requests.
+  def _AddChangeGroup(self):
+    """Get data for node-evacuate requests.
  
      """
      return {
        "instances": self.instances,
-      "reloc_mode": self.reloc_mode,
        "target_groups": self.target_groups,
        }
  
@@ -12339,6 +14167,28 @@ class IAllocator(object):
      self.in_text = serializer.Dump(self.in_data)
  
    _STRING_LIST = ht.TListOf(ht.TString)
+  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
+     # pylint: disable=E1101
+     # Class '...' has no 'OP_ID' member
+     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
+                          opcodes.OpInstanceMigrate.OP_ID,
+                          opcodes.OpInstanceReplaceDisks.OP_ID])
+     })))
+
+  _NEVAC_MOVED = \
+    ht.TListOf(ht.TAnd(ht.TIsLength(3),
+                       ht.TItems([ht.TNonEmptyString,
+                                  ht.TNonEmptyString,
+                                  ht.TListOf(ht.TNonEmptyString),
+                                 ])))
+  _NEVAC_FAILED = \
+    ht.TListOf(ht.TAnd(ht.TIsLength(2),
+                       ht.TItems([ht.TNonEmptyString,
+                                  ht.TMaybeString,
+                                 ])))
+  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
+                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
+
    _MODE_DATA = {
      constants.IALLOCATOR_MODE_ALLOC:
        (_AddNewInstance,
@@ -12357,22 +14207,16 @@ class IAllocator(object):
        (_AddRelocateInstance,
         [("name", ht.TString), ("relocate_from", _STRING_LIST)],
         ht.TList),
-    constants.IALLOCATOR_MODE_MEVAC:
-      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
-       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
-    constants.IALLOCATOR_MODE_MRELOC:
-      (_AddMultiRelocate, [
+     constants.IALLOCATOR_MODE_NODE_EVAC:
+      (_AddNodeEvacuate, [
+        ("instances", _STRING_LIST),
+        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
+        ], _NEVAC_RESULT),
+     constants.IALLOCATOR_MODE_CHG_GROUP:
+      (_AddChangeGroup, [
          ("instances", _STRING_LIST),
-        ("reloc_mode", ht.TElemOf(constants.IALLOCATOR_MRELOC_MODES)),
          ("target_groups", _STRING_LIST),
-        ],
-       ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
-         # pylint: disable-msg=E1101
-         # Class '...' has no 'OP_ID' member
-         "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
-                              opcodes.OpInstanceMigrate.OP_ID,
-                              opcodes.OpInstanceReplaceDisks.OP_ID])
-         })))),
+        ], _NEVAC_RESULT),
      }
  
    def Run(self, name, validate=True, call_fn=None):
@@ -12421,39 +14265,28 @@ class IAllocator(object):
                                 (self._result_check, self.result),
                                 errors.ECODE_INVAL)
  
-    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
-                     constants.IALLOCATOR_MODE_MEVAC):
+    if self.mode == constants.IALLOCATOR_MODE_RELOC:
+      assert self.relocate_from is not None
+      assert self.required_nodes == 1
+
        node2group = dict((name, ndata["group"])
                          for (name, ndata) in self.in_data["nodes"].items())
  
        fn = compat.partial(self._NodesToGroups, node2group,
                            self.in_data["nodegroups"])
  
-      if self.mode == constants.IALLOCATOR_MODE_RELOC:
-        assert self.relocate_from is not None
-        assert self.required_nodes == 1
-
-        request_groups = fn(self.relocate_from)
-        result_groups = fn(rdict["result"])
-
-        if result_groups != request_groups:
-          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
-                                   " differ from original groups (%s)" %
-                                   (utils.CommaJoin(result_groups),
-                                    utils.CommaJoin(request_groups)))
-      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
-        request_groups = fn(self.evac_nodes)
-        for (instance_name, secnode) in self.result:
-          result_groups = fn([secnode])
-          if result_groups != request_groups:
-            raise errors.OpExecError("Iallocator returned new secondary node"
-                                     " '%s' (group '%s') for instance '%s'"
-                                     " which is not in original group '%s'" %
-                                     (secnode, utils.CommaJoin(result_groups),
-                                      instance_name,
-                                      utils.CommaJoin(request_groups)))
-      else:
-        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
+      instance = self.cfg.GetInstanceInfo(self.name)
+      request_groups = fn(self.relocate_from + [instance.primary_node])
+      result_groups = fn(rdict["result"] + [instance.primary_node])
+
+      if self.success and not set(result_groups).issubset(request_groups):
+        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
+                                 " differ from original groups (%s)" %
+                                 (utils.CommaJoin(result_groups),
+                                  utils.CommaJoin(request_groups)))
+
+    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
+      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
  
      self.out_data = rdict
  
@@ -12532,17 +14365,13 @@ class LUTestAllocator(NoHooksLU):
      elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
        fname = _ExpandInstanceName(self.cfg, self.op.name)
        self.op.name = fname
-      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
-    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
-      if not hasattr(self.op, "evac_nodes"):
-        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
-                                   " opcode input", errors.ECODE_INVAL)
-    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
-      if self.op.instances:
-        self.op.instances = _GetWantedInstances(self, self.op.instances)
-      else:
-        raise errors.OpPrereqError("Missing instances to relocate",
-                                   errors.ECODE_INVAL)
+      self.relocate_from = \
+          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
+    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
+                          constants.IALLOCATOR_MODE_NODE_EVAC):
+      if not self.op.instances:
+        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
+      self.op.instances = _GetWantedInstances(self, self.op.instances)
      else:
        raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
                                   self.op.mode, errors.ECODE_INVAL)
@@ -12578,16 +14407,16 @@ class LUTestAllocator(NoHooksLU):
                         name=self.op.name,
                         relocate_from=list(self.relocate_from),
                         )
-    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
+    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
        ial = IAllocator(self.cfg, self.rpc,
                         mode=self.op.mode,
-                       evac_nodes=self.op.evac_nodes)
-    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
+                       instances=self.op.instances,
+                       target_groups=self.op.target_groups)
+    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
        ial = IAllocator(self.cfg, self.rpc,
                         mode=self.op.mode,
                         instances=self.op.instances,
-                       reloc_mode=self.op.reloc_mode,
-                       target_groups=self.op.target_groups)
+                       evac_mode=self.op.evac_mode)
      else:
        raise errors.ProgrammerError("Uncatched mode %s in"
                                     " LUTestAllocator.Exec", self.op.mode)