Add gnt-instance start --pause
[ganeti-local] / lib / cmdlib.py
index 0a1beb8..8b9fa17 100644 (file)
@@ -57,6 +57,7 @@ from ganeti import netutils
 from ganeti import query
 from ganeti import qlang
 from ganeti import opcodes
+from ganeti import ht
 
 import ganeti.masterd.instance # pylint: disable-msg=W0611
 
@@ -860,7 +861,7 @@ def _ExpandInstanceName(cfg, name):
 
 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
                           memory, vcpus, nics, disk_template, disks,
-                          bep, hvp, hypervisor_name):
+                          bep, hvp, hypervisor_name, tags):
   """Builds instance related env variables for hooks
 
   This builds the hook environment from individual variables.
@@ -892,6 +893,8 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
   @param hvp: the hypervisor parameters for the instance
   @type hypervisor_name: string
   @param hypervisor_name: the hypervisor for the instance
+  @type tags: list
+  @param tags: list of instance tags as strings
   @rtype: dict
   @return: the hook environment for this instance
 
@@ -939,6 +942,11 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 
   env["INSTANCE_DISK_COUNT"] = disk_count
 
+  if not tags:
+    tags = []
+
+  env["INSTANCE_TAGS"] = " ".join(tags)
+
   for source, kind in [(bep, "BE"), (hvp, "HV")]:
     for key, value in source.items():
       env["INSTANCE_%s_%s" % (kind, key)] = value
@@ -1002,6 +1010,7 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None):
     'bep': bep,
     'hvp': hvp,
     'hypervisor_name': instance.hypervisor,
+    'tags': instance.tags,
   }
   if override:
     args.update(override)
@@ -1153,7 +1162,7 @@ def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
   iallocator = getattr(lu.op, iallocator_slot, None)
 
   if node is not None and iallocator is not None:
-    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
+    raise errors.OpPrereqError("Do not specify both, iallocator and node",
                                errors.ECODE_INVAL)
   elif node is None and iallocator is None:
     default_iallocator = lu.cfg.GetDefaultIAllocator()
@@ -1161,10 +1170,10 @@ def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
       setattr(lu.op, iallocator_slot, default_iallocator)
     else:
       raise errors.OpPrereqError("No iallocator or node given and no"
-                                 " cluster-wide default iallocator found."
-                                 " Please specify either an iallocator or a"
+                                 " cluster-wide default iallocator found;"
+                                 " please specify either an iallocator or a"
                                  " node, or set a cluster-wide default"
-                                 " iallocator.")
+                                 " iallocator")
 
 
 class LUClusterPostInit(LogicalUnit):
@@ -1253,7 +1262,7 @@ class LUClusterDestroy(LogicalUnit):
 
 
 def _VerifyCertificate(filename):
-  """Verifies a certificate for LUClusterVerify.
+  """Verifies a certificate for L{LUClusterVerifyConfig}.
 
   @type filename: string
   @param filename: Path to PEM file
@@ -1263,7 +1272,7 @@ def _VerifyCertificate(filename):
     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
                                            utils.ReadFile(filename))
   except Exception, err: # pylint: disable-msg=W0703
-    return (LUClusterVerify.ETYPE_ERROR,
+    return (LUClusterVerifyConfig.ETYPE_ERROR,
             "Failed to load X509 certificate %s: %s" % (filename, err))
 
   (errcode, msg) = \
@@ -1278,21 +1287,52 @@ def _VerifyCertificate(filename):
   if errcode is None:
     return (None, fnamemsg)
   elif errcode == utils.CERT_WARNING:
-    return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
+    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
   elif errcode == utils.CERT_ERROR:
-    return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
+    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
 
   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
 
 
-class LUClusterVerify(LogicalUnit):
-  """Verifies the cluster status.
+def _GetAllHypervisorParameters(cluster, instances):
+  """Compute the set of all hypervisor parameters.
+
+  @type cluster: L{objects.Cluster}
+  @param cluster: the cluster object
+  @param instances: list of L{objects.Instance}
+  @param instances: additional instances from which to obtain parameters
+  @rtype: list of (origin, hypervisor, parameters)
+  @return: a list with all parameters found, indicating the hypervisor they
+       apply to, and the origin (can be "cluster", "os X", or "instance Y")
 
   """
-  HPATH = "cluster-verify"
-  HTYPE = constants.HTYPE_CLUSTER
-  REQ_BGL = False
+  hvp_data = []
+
+  for hv_name in cluster.enabled_hypervisors:
+    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
+
+  for os_name, os_hvp in cluster.os_hvp.items():
+    for hv_name, hv_params in os_hvp.items():
+      if hv_params:
+        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
+        hvp_data.append(("os %s" % os_name, hv_name, full_params))
 
+  # TODO: collapse identical parameter values in a single one
+  for instance in instances:
+    if instance.hvparams:
+      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
+                       cluster.FillHV(instance)))
+
+  return hvp_data
+
+
+class _VerifyErrors(object):
+  """Mix-in for cluster/group verify LUs.
+
+  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
+  self.op and self._feedback_fn to be available.)
+
+  """
   TCLUSTER = "cluster"
   TNODE = "node"
   TINSTANCE = "instance"
@@ -1300,6 +1340,8 @@ class LUClusterVerify(LogicalUnit):
   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
+  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
+  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
@@ -1329,6 +1371,140 @@ class LUClusterVerify(LogicalUnit):
   ETYPE_ERROR = "ERROR"
   ETYPE_WARNING = "WARNING"
 
+  def _Error(self, ecode, item, msg, *args, **kwargs):
+    """Format an error message.
+
+    Based on the opcode's error_codes parameter, either format a
+    parseable error code, or a simpler error string.
+
+    This must be called only from Exec and functions called from Exec.
+
+    """
+    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
+    itype, etxt = ecode
+    # first complete the msg
+    if args:
+      msg = msg % args
+    # then format the whole message
+    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
+      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
+    else:
+      if item:
+        item = " " + item
+      else:
+        item = ""
+      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
+    # and finally report it via the feedback_fn
+    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
+
+  def _ErrorIf(self, cond, *args, **kwargs):
+    """Log an error message if the passed condition is True.
+
+    """
+    cond = (bool(cond)
+            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
+    if cond:
+      self._Error(*args, **kwargs)
+    # do not mark the operation as failed for WARN cases only
+    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
+      self.bad = self.bad or cond
+
+
+class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
+  """Verifies the cluster config.
+
+  """
+  REQ_BGL = True
+
+  def _VerifyHVP(self, hvp_data):
+    """Verifies locally the syntax of the hypervisor parameters.
+
+    """
+    for item, hv_name, hv_params in hvp_data:
+      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
+             (item, hv_name))
+      try:
+        hv_class = hypervisor.GetHypervisor(hv_name)
+        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
+        hv_class.CheckParameterSyntax(hv_params)
+      except errors.GenericError, err:
+        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
+
+  def ExpandNames(self):
+    # Information can be safely retrieved as the BGL is acquired in exclusive
+    # mode
+    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
+    self.all_node_info = self.cfg.GetAllNodesInfo()
+    self.all_inst_info = self.cfg.GetAllInstancesInfo()
+    self.needed_locks = {}
+
+  def Exec(self, feedback_fn):
+    """Verify integrity of cluster, performing various test on nodes.
+
+    """
+    self.bad = False
+    self._feedback_fn = feedback_fn
+
+    feedback_fn("* Verifying cluster config")
+
+    for msg in self.cfg.VerifyConfig():
+      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
+
+    feedback_fn("* Verifying cluster certificate files")
+
+    for cert_filename in constants.ALL_CERT_FILES:
+      (errcode, msg) = _VerifyCertificate(cert_filename)
+      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
+
+    feedback_fn("* Verifying hypervisor parameters")
+
+    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
+                                                self.all_inst_info.values()))
+
+    feedback_fn("* Verifying all nodes belong to an existing group")
+
+    # We do this verification here because, should this bogus circumstance
+    # occur, it would never be caught by VerifyGroup, which only acts on
+    # nodes/instances reachable from existing node groups.
+
+    dangling_nodes = set(node.name for node in self.all_node_info.values()
+                         if node.group not in self.all_group_info)
+
+    dangling_instances = {}
+    no_node_instances = []
+
+    for inst in self.all_inst_info.values():
+      if inst.primary_node in dangling_nodes:
+        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
+      elif inst.primary_node not in self.all_node_info:
+        no_node_instances.append(inst.name)
+
+    pretty_dangling = [
+        "%s (%s)" %
+        (node.name,
+         utils.CommaJoin(dangling_instances.get(node.name,
+                                                ["no instances"])))
+        for node in dangling_nodes]
+
+    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
+                  "the following nodes (and their instances) belong to a non"
+                  " existing group: %s", utils.CommaJoin(pretty_dangling))
+
+    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
+                  "the following instances have a non-existing primary-node:"
+                  " %s", utils.CommaJoin(no_node_instances))
+
+    return (not self.bad, [g.name for g in self.all_group_info.values()])
+
+
+class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
+  """Verifies the status of a node group.
+
+  """
+  HPATH = "cluster-verify"
+  HTYPE = constants.HTYPE_CLUSTER
+  REQ_BGL = False
+
   _HOOKS_INDENT_RE = re.compile("^", re.M)
 
   class NodeImage(object):
@@ -1382,48 +1558,87 @@ class LUClusterVerify(LogicalUnit):
       self.oslist = {}
 
   def ExpandNames(self):
+    # This raises errors.OpPrereqError on its own:
+    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
+
+    # Get instances in node group; this is unsafe and needs verification later
+    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
+
     self.needed_locks = {
-      locking.LEVEL_NODE: locking.ALL_SET,
-      locking.LEVEL_INSTANCE: locking.ALL_SET,
-    }
+      locking.LEVEL_INSTANCE: inst_names,
+      locking.LEVEL_NODEGROUP: [self.group_uuid],
+      locking.LEVEL_NODE: [],
+      }
+
     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
 
-  def _Error(self, ecode, item, msg, *args, **kwargs):
-    """Format an error message.
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_NODE:
+      # Get members of node group; this is unsafe and needs verification later
+      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
 
-    Based on the opcode's error_codes parameter, either format a
-    parseable error code, or a simpler error string.
+      all_inst_info = self.cfg.GetAllInstancesInfo()
 
-    This must be called only from Exec and functions called from Exec.
+      # In Exec(), we warn about mirrored instances that have primary and
+      # secondary living in separate node groups. To fully verify that
+      # volumes for these instances are healthy, we will need to do an
+      # extra call to their secondaries. We ensure here those nodes will
+      # be locked.
+      for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
+        # Important: access only the instances whose lock is owned
+        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
+          nodes.update(all_inst_info[inst].secondary_nodes)
 
-    """
-    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
-    itype, etxt = ecode
-    # first complete the msg
-    if args:
-      msg = msg % args
-    # then format the whole message
-    if self.op.error_codes:
-      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
-    else:
-      if item:
-        item = " " + item
-      else:
-        item = ""
-      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
-    # and finally report it via the feedback_fn
-    self._feedback_fn("  - %s" % msg)
+      self.needed_locks[locking.LEVEL_NODE] = nodes
 
-  def _ErrorIf(self, cond, *args, **kwargs):
-    """Log an error message if the passed condition is True.
+  def CheckPrereq(self):
+    group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
+    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
 
-    """
-    cond = bool(cond) or self.op.debug_simulate_errors
-    if cond:
-      self._Error(*args, **kwargs)
-    # do not mark the operation as failed for WARN cases only
-    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
-      self.bad = self.bad or cond
+    unlocked_nodes = \
+        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
+
+    unlocked_instances = \
+        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
+
+    if unlocked_nodes:
+      raise errors.OpPrereqError("Missing lock for nodes: %s" %
+                                 utils.CommaJoin(unlocked_nodes))
+
+    if unlocked_instances:
+      raise errors.OpPrereqError("Missing lock for instances: %s" %
+                                 utils.CommaJoin(unlocked_instances))
+
+    self.all_node_info = self.cfg.GetAllNodesInfo()
+    self.all_inst_info = self.cfg.GetAllInstancesInfo()
+
+    self.my_node_names = utils.NiceSort(group_nodes)
+    self.my_inst_names = utils.NiceSort(group_instances)
+
+    self.my_node_info = dict((name, self.all_node_info[name])
+                             for name in self.my_node_names)
+
+    self.my_inst_info = dict((name, self.all_inst_info[name])
+                             for name in self.my_inst_names)
+
+    # We detect here the nodes that will need the extra RPC calls for verifying
+    # split LV volumes; they should be locked.
+    extra_lv_nodes = set()
+
+    for inst in self.my_inst_info.values():
+      if inst.disk_template in constants.DTS_INT_MIRROR:
+        group = self.my_node_info[inst.primary_node].group
+        for nname in inst.secondary_nodes:
+          if self.all_node_info[nname].group != group:
+            extra_lv_nodes.add(nname)
+
+    unlocked_lv_nodes = \
+        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
+
+    if unlocked_lv_nodes:
+      raise errors.OpPrereqError("these nodes could be locked: %s" %
+                                 utils.CommaJoin(unlocked_lv_nodes))
+    self.extra_lv_nodes = list(extra_lv_nodes)
 
   def _VerifyNode(self, ninfo, nresult):
     """Perform some basic validation on data returned from a node.
@@ -1663,12 +1878,6 @@ class LUClusterVerify(LogicalUnit):
                "instance not running on its primary node %s",
                node_current)
 
-    for node, n_img in node_image.items():
-      if node != node_current:
-        test = instance in n_img.instances
-        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
-                 "instance should not run on node %s", node)
-
     diskdata = [(nname, success, status, idx)
                 for (nname, disks) in diskstatus.items()
                 for idx, (success, status) in enumerate(disks)]
@@ -1708,18 +1917,6 @@ class LUClusterVerify(LogicalUnit):
         self._ErrorIf(test, self.ENODEORPHANLV, node,
                       "volume %s is unknown", volume)
 
-  def _VerifyOrphanInstances(self, instancelist, node_image):
-    """Verify the list of running instances.
-
-    This checks what instances are running but unknown to the cluster.
-
-    """
-    for node, n_img in node_image.items():
-      for o_inst in n_img.instances:
-        test = o_inst not in instancelist
-        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
-                      "instance %s on node %s should not exist", o_inst, node)
-
   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
     """Verify N+1 Memory Resilience.
 
@@ -1819,8 +2016,8 @@ class LUClusterVerify(LogicalUnit):
         # All or no nodes
         errorif(missing_file and missing_file != node_names,
                 cls.ECLUSTERFILECHECK, None,
-                "File %s is optional, but it must exist on all or no nodes (not"
-                " found on %s)",
+                "File %s is optional, but it must exist on all or no"
+                " nodes (not found on %s)",
                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
       else:
         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
@@ -2202,20 +2399,6 @@ class LUClusterVerify(LogicalUnit):
 
     return instdisk
 
-  def _VerifyHVP(self, hvp_data):
-    """Verifies locally the syntax of the hypervisor parameters.
-
-    """
-    for item, hv_name, hv_params in hvp_data:
-      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
-             (item, hv_name))
-      try:
-        hv_class = hypervisor.GetHypervisor(hv_name)
-        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
-        hv_class.CheckParameterSyntax(hv_params)
-      except errors.GenericError, err:
-        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
-
   def BuildHooksEnv(self):
     """Build hooks env.
 
@@ -2223,14 +2406,12 @@ class LUClusterVerify(LogicalUnit):
     the output be logged in the verify output and the verification to fail.
 
     """
-    cfg = self.cfg
-
     env = {
-      "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
+      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
       }
 
     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
-               for node in cfg.GetAllNodesInfo().values())
+               for node in self.my_node_info.values())
 
     return env
 
@@ -2238,36 +2419,31 @@ class LUClusterVerify(LogicalUnit):
     """Build hooks nodes.
 
     """
-    return ([], self.cfg.GetNodeList())
+    return ([], self.my_node_names)
 
   def Exec(self, feedback_fn):
-    """Verify integrity of cluster, performing various test on nodes.
+    """Verify integrity of the node group, performing various test on nodes.
 
     """
     # This method has too many local variables. pylint: disable-msg=R0914
+
+    if not self.my_node_names:
+      # empty node group
+      feedback_fn("* Empty node group, skipping verification")
+      return True
+
     self.bad = False
     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
     verbose = self.op.verbose
     self._feedback_fn = feedback_fn
-    feedback_fn("* Verifying global settings")
-    for msg in self.cfg.VerifyConfig():
-      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
-
-    # Check the cluster certificates
-    for cert_filename in constants.ALL_CERT_FILES:
-      (errcode, msg) = _VerifyCertificate(cert_filename)
-      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
 
     vg_name = self.cfg.GetVGName()
     drbd_helper = self.cfg.GetDRBDHelper()
-    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
     cluster = self.cfg.GetClusterInfo()
-    nodeinfo_byname = self.cfg.GetAllNodesInfo()
-    nodelist = utils.NiceSort(nodeinfo_byname.keys())
-    nodeinfo = [nodeinfo_byname[nname] for nname in nodelist]
-    instanceinfo = self.cfg.GetAllInstancesInfo()
-    instancelist = utils.NiceSort(instanceinfo.keys())
     groupinfo = self.cfg.GetAllNodeGroupsInfo()
+    hypervisors = cluster.enabled_hypervisors
+    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
+
     i_non_redundant = [] # Non redundant instances
     i_non_a_balanced = [] # Non auto-balanced instances
     n_offline = 0 # Count of offline nodes
@@ -2283,37 +2459,32 @@ class LUClusterVerify(LogicalUnit):
     master_node = self.master_node = self.cfg.GetMasterNode()
     master_ip = self.cfg.GetMasterIP()
 
-    # Compute the set of hypervisor parameters
-    hvp_data = []
-    for hv_name in hypervisors:
-      hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
-    for os_name, os_hvp in cluster.os_hvp.items():
-      for hv_name, hv_params in os_hvp.items():
-        if not hv_params:
-          continue
-        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
-        hvp_data.append(("os %s" % os_name, hv_name, full_params))
-    # TODO: collapse identical parameter values in a single one
-    for instance in instanceinfo.values():
-      if not instance.hvparams:
-        continue
-      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
-                       cluster.FillHV(instance)))
-    # and verify them locally
-    self._VerifyHVP(hvp_data)
+    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
+
+    # We will make nodes contact all nodes in their group, and one node from
+    # every other group.
+    # TODO: should it be a *random* node, different every time?
+    online_nodes = [node.name for node in node_data_list if not node.offline]
+    other_group_nodes = {}
+
+    for name in sorted(self.all_node_info):
+      node = self.all_node_info[name]
+      if (node.group not in other_group_nodes
+          and node.group != self.group_uuid
+          and not node.offline):
+        other_group_nodes[node.group] = node.name
 
-    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
     node_verify_param = {
       constants.NV_FILELIST:
         utils.UniqueSequence(filename
                              for files in filemap
                              for filename in files),
-      constants.NV_NODELIST: [node.name for node in nodeinfo
-                              if not node.offline],
+      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
       constants.NV_HYPERVISOR: hypervisors,
-      constants.NV_HVPARAMS: hvp_data,
-      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
-                                  node.secondary_ip) for node in nodeinfo
+      constants.NV_HVPARAMS:
+        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
+      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
+                                 for node in node_data_list
                                  if not node.offline],
       constants.NV_INSTANCELIST: hypervisors,
       constants.NV_VERSION: None,
@@ -2340,7 +2511,7 @@ class LUClusterVerify(LogicalUnit):
     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
       bridges.add(default_nicpp[constants.NIC_LINK])
-    for instance in instanceinfo.values():
+    for instance in self.my_inst_info.values():
       for nic in instance.nics:
         full_nic = cluster.SimpleFillNIC(nic.nicparams)
         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
@@ -2353,11 +2524,11 @@ class LUClusterVerify(LogicalUnit):
     node_image = dict((node.name, self.NodeImage(offline=node.offline,
                                                  name=node.name,
                                                  vm_capable=node.vm_capable))
-                      for node in nodeinfo)
+                      for node in node_data_list)
 
     # Gather OOB paths
     oob_paths = []
-    for node in nodeinfo:
+    for node in self.all_node_info.values():
       path = _SupportsOob(self.cfg, node)
       if path and path not in oob_paths:
         oob_paths.append(path)
@@ -2365,14 +2536,13 @@ class LUClusterVerify(LogicalUnit):
     if oob_paths:
       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
 
-    for instance in instancelist:
-      inst_config = instanceinfo[instance]
+    for instance in self.my_inst_names:
+      inst_config = self.my_inst_info[instance]
 
       for nname in inst_config.all_nodes:
         if nname not in node_image:
-          # ghost node
           gnode = self.NodeImage(name=nname)
-          gnode.ghost = True
+          gnode.ghost = (nname not in self.all_node_info)
           node_image[nname] = gnode
 
       inst_config.MapLVsByNode(node_vol_should)
@@ -2395,23 +2565,60 @@ class LUClusterVerify(LogicalUnit):
     # time before and after executing the request, we can at least have a time
     # window.
     nvinfo_starttime = time.time()
-    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
+    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
+                                           node_verify_param,
                                            self.cfg.GetClusterName())
     nvinfo_endtime = time.time()
 
+    if self.extra_lv_nodes and vg_name is not None:
+      extra_lv_nvinfo = \
+          self.rpc.call_node_verify(self.extra_lv_nodes,
+                                    {constants.NV_LVLIST: vg_name},
+                                    self.cfg.GetClusterName())
+    else:
+      extra_lv_nvinfo = {}
+
     all_drbd_map = self.cfg.ComputeDRBDMap()
 
-    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
-    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
+    feedback_fn("* Gathering disk information (%s nodes)" %
+                len(self.my_node_names))
+    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
+                                     self.my_inst_info)
 
     feedback_fn("* Verifying configuration file consistency")
-    self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
+
+    # If not all nodes are being checked, we need to make sure the master node
+    # and a non-checked vm_capable node are in the list.
+    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
+    if absent_nodes:
+      vf_nvinfo = all_nvinfo.copy()
+      vf_node_info = list(self.my_node_info.values())
+      additional_nodes = []
+      if master_node not in self.my_node_info:
+        additional_nodes.append(master_node)
+        vf_node_info.append(self.all_node_info[master_node])
+      # Add the first vm_capable node we find which is not included
+      for node in absent_nodes:
+        nodeinfo = self.all_node_info[node]
+        if nodeinfo.vm_capable and not nodeinfo.offline:
+          additional_nodes.append(node)
+          vf_node_info.append(self.all_node_info[node])
+          break
+      key = constants.NV_FILELIST
+      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
+                                                 {key: node_verify_param[key]},
+                                                 self.cfg.GetClusterName()))
+    else:
+      vf_nvinfo = all_nvinfo
+      vf_node_info = self.my_node_info.values()
+
+    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
 
     feedback_fn("* Verifying node status")
 
     refos_img = None
 
-    for node_i in nodeinfo:
+    for node_i in node_data_list:
       node = node_i.name
       nimg = node_image[node]
 
@@ -2448,24 +2655,41 @@ class LUClusterVerify(LogicalUnit):
 
       if nimg.vm_capable:
         self._VerifyNodeLVM(node_i, nresult, vg_name)
-        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
+        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
                              all_drbd_map)
 
         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
         self._UpdateNodeInstances(node_i, nresult, nimg)
         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
         self._UpdateNodeOS(node_i, nresult, nimg)
+
         if not nimg.os_fail:
           if refos_img is None:
             refos_img = nimg
           self._VerifyNodeOS(node_i, nimg, refos_img)
         self._VerifyNodeBridges(node_i, nresult, bridges)
 
+        # Check whether all running instancies are primary for the node. (This
+        # can no longer be done from _VerifyInstance below, since some of the
+        # wrong instances could be from other node groups.)
+        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
+
+        for inst in non_primary_inst:
+          test = inst in self.all_inst_info
+          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
+                   "instance should not run on node %s", node_i.name)
+          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
+                   "node is running unknown instance %s", inst)
+
+    for node, result in extra_lv_nvinfo.items():
+      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
+                              node_image[node], vg_name)
+
     feedback_fn("* Verifying instance status")
-    for instance in instancelist:
+    for instance in self.my_inst_names:
       if verbose:
         feedback_fn("* Verifying instance %s" % instance)
-      inst_config = instanceinfo[instance]
+      inst_config = self.my_inst_info[instance]
       self._VerifyInstance(instance, inst_config, node_image,
                            instdisk[instance])
       inst_nodes_offline = []
@@ -2500,7 +2724,7 @@ class LUClusterVerify(LogicalUnit):
         instance_groups = {}
 
         for node in instance_nodes:
-          instance_groups.setdefault(nodeinfo_byname[node].group,
+          instance_groups.setdefault(self.all_node_info[node].group,
                                      []).append(node)
 
         pretty_list = [
@@ -2539,14 +2763,22 @@ class LUClusterVerify(LogicalUnit):
 
     feedback_fn("* Verifying orphan volumes")
     reserved = utils.FieldSet(*cluster.reserved_lvs)
-    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
 
-    feedback_fn("* Verifying orphan instances")
-    self._VerifyOrphanInstances(instancelist, node_image)
+    # We will get spurious "unknown volume" warnings if any node of this group
+    # is secondary for an instance whose primary is in another group. To avoid
+    # them, we find these instances and add their volumes to node_vol_should.
+    for inst in self.all_inst_info.values():
+      for secondary in inst.secondary_nodes:
+        if (secondary in self.my_node_info
+            and inst.name not in self.my_inst_info):
+          inst.MapLVsByNode(node_vol_should)
+          break
+
+    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
 
     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
       feedback_fn("* Verifying N+1 Memory redundancy")
-      self._VerifyNPlusOneMemory(node_image, instanceinfo)
+      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
 
     feedback_fn("* Other Notes")
     if i_non_redundant:
@@ -2580,9 +2812,12 @@ class LUClusterVerify(LogicalUnit):
         and hook results
 
     """
-    # We only really run POST phase hooks, and are only interested in
-    # their results
-    if phase == constants.HOOKS_PHASE_POST:
+    # We only really run POST phase hooks, only for non-empty groups,
+    # and are only interested in their results
+    if not self.my_node_names:
+      # empty node group
+      pass
+    elif phase == constants.HOOKS_PHASE_POST:
       # Used to change hooks' output to proper indentation
       feedback_fn("* Hooks Results")
       assert hooks_results, "invalid result from hooks"
@@ -2608,7 +2843,7 @@ class LUClusterVerify(LogicalUnit):
             feedback_fn("%s" % output)
             lu_result = 0
 
-      return lu_result
+    return lu_result
 
 
 class LUClusterVerifyDisks(NoHooksLU):
@@ -5458,7 +5693,8 @@ class LUInstanceStartup(LogicalUnit):
     instance = self.instance
     force = self.op.force
 
-    self.cfg.MarkInstanceUp(instance.name)
+    if not self.op.no_remember:
+      self.cfg.MarkInstanceUp(instance.name)
 
     if self.primary_offline:
       assert self.op.ignore_offline_nodes
@@ -5469,7 +5705,8 @@ class LUInstanceStartup(LogicalUnit):
       _StartInstanceDisks(self, instance, force)
 
       result = self.rpc.call_instance_start(node_current, instance,
-                                            self.op.hvparams, self.op.beparams)
+                                            self.op.hvparams, self.op.beparams,
+                                            self.op.startup_paused)
       msg = result.fail_msg
       if msg:
         _ShutdownInstanceDisks(self, instance)
@@ -5559,7 +5796,8 @@ class LUInstanceReboot(LogicalUnit):
         self.LogInfo("Instance %s was already stopped, starting now",
                      instance.name)
       _StartInstanceDisks(self, instance, ignore_secondaries)
-      result = self.rpc.call_instance_start(node_current, instance, None, None)
+      result = self.rpc.call_instance_start(node_current, instance,
+                                            None, None, False)
       msg = result.fail_msg
       if msg:
         _ShutdownInstanceDisks(self, instance)
@@ -5623,7 +5861,8 @@ class LUInstanceShutdown(LogicalUnit):
     node_current = instance.primary_node
     timeout = self.op.timeout
 
-    self.cfg.MarkInstanceDown(instance.name)
+    if not self.op.no_remember:
+      self.cfg.MarkInstanceDown(instance.name)
 
     if self.primary_offline:
       assert self.op.ignore_offline_nodes
@@ -5928,7 +6167,7 @@ class LUInstanceRename(LogicalUnit):
     old_name = inst.name
 
     rename_file_storage = False
-    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
+    if (inst.disk_template in constants.DTS_FILEBASED and
         self.op.new_name != inst.name):
       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
       rename_file_storage = True
@@ -6409,7 +6648,8 @@ class LUInstanceMove(LogicalUnit):
         _ShutdownInstanceDisks(self, instance)
         raise errors.OpExecError("Can't activate the instance's disks")
 
-      result = self.rpc.call_instance_start(target_node, instance, None, None)
+      result = self.rpc.call_instance_start(target_node, instance,
+                                            None, None, False)
       msg = result.fail_msg
       if msg:
         _ShutdownInstanceDisks(self, instance)
@@ -6426,45 +6666,15 @@ class LUNodeMigrate(LogicalUnit):
   REQ_BGL = False
 
   def CheckArguments(self):
-    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
+    pass
 
   def ExpandNames(self):
     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
 
-    self.needed_locks = {}
-
-    # Create tasklets for migrating instances for all instances on this node
-    names = []
-    tasklets = []
-
-    self.lock_all_nodes = False
-
-    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
-      logging.debug("Migrating instance %s", inst.name)
-      names.append(inst.name)
-
-      tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False))
-
-      if inst.disk_template in constants.DTS_EXT_MIRROR:
-        # We need to lock all nodes, as the iallocator will choose the
-        # destination nodes afterwards
-        self.lock_all_nodes = True
-
-    self.tasklets = tasklets
-
-    # Declare node locks
-    if self.lock_all_nodes:
-      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
-    else:
-      self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
-      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
-
-    # Declare instance locks
-    self.needed_locks[locking.LEVEL_INSTANCE] = names
-
-  def DeclareLocks(self, level):
-    if level == locking.LEVEL_NODE and not self.lock_all_nodes:
-      self._LockInstancesNodes()
+    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+    self.needed_locks = {
+      locking.LEVEL_NODE: [self.op.node_name],
+      }
 
   def BuildHooksEnv(self):
     """Build hooks env.
@@ -6483,6 +6693,30 @@ class LUNodeMigrate(LogicalUnit):
     nl = [self.cfg.GetMasterNode()]
     return (nl, nl)
 
+  def CheckPrereq(self):
+    pass
+
+  def Exec(self, feedback_fn):
+    # Prepare jobs for migration instances
+    jobs = [
+      [opcodes.OpInstanceMigrate(instance_name=inst.name,
+                                 mode=self.op.mode,
+                                 live=self.op.live,
+                                 iallocator=self.op.iallocator,
+                                 target_node=self.op.target_node)]
+      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
+      ]
+
+    # TODO: Run iallocator in this opcode and pass correct placement options to
+    # OpInstanceMigrate. Since other jobs can modify the cluster between
+    # running the iallocator and the actual migration, a good consistency model
+    # will have to be found.
+
+    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
+            frozenset([self.op.node_name]))
+
+    return ResultWithJobs(jobs)
+
 
 class TLMigrateInstance(Tasklet):
   """Tasklet class for instance migration.
@@ -7367,7 +7601,7 @@ def _CreateDisks(lu, instance, to_skip=None, target_node=None):
     pnode = target_node
     all_nodes = [pnode]
 
-  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
+  if instance.disk_template in constants.DTS_FILEBASED:
     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
 
@@ -7640,9 +7874,10 @@ class LUInstanceCreate(LogicalUnit):
       raise errors.OpPrereqError("Invalid file driver name '%s'" %
                                  self.op.file_driver, errors.ECODE_INVAL)
 
-    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
-      raise errors.OpPrereqError("File storage directory path not absolute",
-                                 errors.ECODE_INVAL)
+    if self.op.disk_template == constants.DT_FILE:
+      opcodes.RequireFileStorage()
+    elif self.op.disk_template == constants.DT_SHARED_FILE:
+      opcodes.RequireSharedFileStorage()
 
     ### Node/iallocator related checks
     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
@@ -7784,10 +8019,10 @@ class LUInstanceCreate(LogicalUnit):
                      mode=constants.IALLOCATOR_MODE_ALLOC,
                      name=self.op.instance_name,
                      disk_template=self.op.disk_template,
-                     tags=[],
+                     tags=self.op.tags,
                      os=self.op.os_type,
                      vcpus=self.be_full[constants.BE_VCPUS],
-                     mem_size=self.be_full[constants.BE_MEMORY],
+                     memory=self.be_full[constants.BE_MEMORY],
                      disks=self.disks,
                      nics=nics,
                      hypervisor=self.op.hypervisor,
@@ -7841,6 +8076,7 @@ class LUInstanceCreate(LogicalUnit):
       bep=self.be_full,
       hvp=self.hv_full,
       hypervisor_name=self.op.hypervisor,
+      tags=self.op.tags,
     ))
 
     return env
@@ -7942,9 +8178,13 @@ class LUInstanceCreate(LogicalUnit):
         nics.append(ndict)
       self.op.nics = nics
 
+    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
+      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
+
     if (self.op.hypervisor is None and
         einfo.has_option(constants.INISECT_INS, "hypervisor")):
       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
+
     if einfo.has_section(constants.INISECT_HYP):
       # use the export parameters but do not override the ones
       # specified by the user
@@ -7996,10 +8236,40 @@ class LUInstanceCreate(LogicalUnit):
       if name in os_defs and os_defs[name] == self.op.osparams[name]:
         del self.op.osparams[name]
 
+  def _CalculateFileStorageDir(self):
+    """Calculate final instance file storage dir.
+
+    """
+    # file storage dir calculation/check
+    self.instance_file_storage_dir = None
+    if self.op.disk_template in constants.DTS_FILEBASED:
+      # build the full file storage dir path
+      joinargs = []
+
+      if self.op.disk_template == constants.DT_SHARED_FILE:
+        get_fsd_fn = self.cfg.GetSharedFileStorageDir
+      else:
+        get_fsd_fn = self.cfg.GetFileStorageDir
+
+      cfg_storagedir = get_fsd_fn()
+      if not cfg_storagedir:
+        raise errors.OpPrereqError("Cluster file storage dir not defined")
+      joinargs.append(cfg_storagedir)
+
+      if self.op.file_storage_dir is not None:
+        joinargs.append(self.op.file_storage_dir)
+
+      joinargs.append(self.op.instance_name)
+
+      # pylint: disable-msg=W0142
+      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
+
   def CheckPrereq(self):
     """Check prerequisites.
 
     """
+    self._CalculateFileStorageDir()
+
     if self.op.mode == constants.INSTANCE_IMPORT:
       export_info = self._ReadExportInfo()
       self._ReadExportParams(export_info)
@@ -8020,6 +8290,10 @@ class LUInstanceCreate(LogicalUnit):
                                   ",".join(enabled_hvs)),
                                  errors.ECODE_STATE)
 
+    # Check tag validity
+    for tag in self.op.tags:
+      objects.TaggableObject.ValidateTag(tag)
+
     # check hypervisor parameter syntax (locally)
     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
@@ -8327,30 +8601,12 @@ class LUInstanceCreate(LogicalUnit):
     else:
       network_port = None
 
-    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
-      # this is needed because os.path.join does not accept None arguments
-      if self.op.file_storage_dir is None:
-        string_file_storage_dir = ""
-      else:
-        string_file_storage_dir = self.op.file_storage_dir
-
-      # build the full file storage dir path
-      if self.op.disk_template == constants.DT_SHARED_FILE:
-        get_fsd_fn = self.cfg.GetSharedFileStorageDir
-      else:
-        get_fsd_fn = self.cfg.GetFileStorageDir
-
-      file_storage_dir = utils.PathJoin(get_fsd_fn(),
-                                        string_file_storage_dir, instance)
-    else:
-      file_storage_dir = ""
-
     disks = _GenerateDiskTemplate(self,
                                   self.op.disk_template,
                                   instance, pnode_name,
                                   self.secondaries,
                                   self.disks,
-                                  file_storage_dir,
+                                  self.instance_file_storage_dir,
                                   self.op.file_driver,
                                   0,
                                   feedback_fn)
@@ -8367,6 +8623,10 @@ class LUInstanceCreate(LogicalUnit):
                             osparams=self.op.osparams,
                             )
 
+    if self.op.tags:
+      for tag in self.op.tags:
+        iobj.AddTag(tag)
+
     if self.adopt_disks:
       if self.op.disk_template == constants.DT_PLAIN:
         # rename LVs to the newly-generated names; we need to construct
@@ -8514,7 +8774,8 @@ class LUInstanceCreate(LogicalUnit):
       self.cfg.Update(iobj, feedback_fn)
       logging.info("Starting instance %s on node %s", instance, pnode_name)
       feedback_fn("* starting instance...")
-      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
+      result = self.rpc.call_instance_start(pnode_name, iobj,
+                                            None, None, False)
       result.Raise("Could not start instance")
 
     return list(iobj.all_nodes)
@@ -9497,8 +9758,8 @@ class LURepairNodeStorage(NoHooksLU):
                  (self.op.name, self.op.node_name))
 
 
-class LUNodeEvacStrategy(NoHooksLU):
-  """Computes the node evacuation strategy.
+class LUNodeEvacuate(NoHooksLU):
+  """Evacuates instances off a list of nodes.
 
   """
   REQ_BGL = False
@@ -9507,38 +9768,166 @@ class LUNodeEvacStrategy(NoHooksLU):
     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
 
   def ExpandNames(self):
-    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
-    self.needed_locks = locks = {}
+    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
+
+    if self.op.remote_node is not None:
+      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
+      assert self.op.remote_node
+
+      if self.op.remote_node == self.op.node_name:
+        raise errors.OpPrereqError("Can not use evacuated node as a new"
+                                   " secondary node", errors.ECODE_INVAL)
+
+      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
+        raise errors.OpPrereqError("Without the use of an iallocator only"
+                                   " secondary instances can be evacuated",
+                                   errors.ECODE_INVAL)
+
+    # Declare locks
+    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+    self.needed_locks = {
+      locking.LEVEL_INSTANCE: [],
+      locking.LEVEL_NODEGROUP: [],
+      locking.LEVEL_NODE: [],
+      }
+
     if self.op.remote_node is None:
-      locks[locking.LEVEL_NODE] = locking.ALL_SET
+      # Iallocator will choose any node(s) in the same group
+      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
     else:
-      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
-      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
+      group_nodes = frozenset([self.op.remote_node])
+
+    # Determine nodes to be locked
+    self.lock_nodes = set([self.op.node_name]) | group_nodes
+
+  def _DetermineInstances(self):
+    """Builds list of instances to operate on.
+
+    """
+    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
+
+    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
+      # Primary instances only
+      inst_fn = _GetNodePrimaryInstances
+      assert self.op.remote_node is None, \
+        "Evacuating primary instances requires iallocator"
+    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
+      # Secondary instances only
+      inst_fn = _GetNodeSecondaryInstances
+    else:
+      # All instances
+      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
+      inst_fn = _GetNodeInstances
+
+    return inst_fn(self.cfg, self.op.node_name)
+
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_INSTANCE:
+      # Lock instances optimistically, needs verification once node and group
+      # locks have been acquired
+      self.needed_locks[locking.LEVEL_INSTANCE] = \
+        set(i.name for i in self._DetermineInstances())
+
+    elif level == locking.LEVEL_NODEGROUP:
+      # Lock node groups optimistically, needs verification once nodes have
+      # been acquired
+      self.needed_locks[locking.LEVEL_NODEGROUP] = \
+        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
+
+    elif level == locking.LEVEL_NODE:
+      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
+
+  def CheckPrereq(self):
+    # Verify locks
+    owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
+    owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
+    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
+
+    assert owned_nodes == self.lock_nodes
+
+    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
+    if owned_groups != wanted_groups:
+      raise errors.OpExecError("Node groups changed since locks were acquired,"
+                               " current groups are '%s', used to be '%s'" %
+                               (utils.CommaJoin(wanted_groups),
+                                utils.CommaJoin(owned_groups)))
+
+    # Determine affected instances
+    self.instances = self._DetermineInstances()
+    self.instance_names = [i.name for i in self.instances]
+
+    if set(self.instance_names) != owned_instances:
+      raise errors.OpExecError("Instances on node '%s' changed since locks"
+                               " were acquired, current instances are '%s',"
+                               " used to be '%s'" %
+                               (self.op.node_name,
+                                utils.CommaJoin(self.instance_names),
+                                utils.CommaJoin(owned_instances)))
+
+    if self.instance_names:
+      self.LogInfo("Evacuating instances from node '%s': %s",
+                   self.op.node_name,
+                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
+    else:
+      self.LogInfo("No instances to evacuate from node '%s'",
+                   self.op.node_name)
 
-  def Exec(self, feedback_fn):
     if self.op.remote_node is not None:
-      instances = []
-      for node in self.op.nodes:
-        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
-      result = []
-      for i in instances:
+      for i in self.instances:
         if i.primary_node == self.op.remote_node:
           raise errors.OpPrereqError("Node %s is the primary node of"
                                      " instance %s, cannot use it as"
                                      " secondary" %
                                      (self.op.remote_node, i.name),
                                      errors.ECODE_INVAL)
-        result.append([i.name, self.op.remote_node])
-    else:
-      ial = IAllocator(self.cfg, self.rpc,
-                       mode=constants.IALLOCATOR_MODE_MEVAC,
-                       evac_nodes=self.op.nodes)
-      ial.Run(self.op.iallocator, validate=True)
+
+  def Exec(self, feedback_fn):
+    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
+
+    if not self.instance_names:
+      # No instances to evacuate
+      jobs = []
+
+    elif self.op.iallocator is not None:
+      # TODO: Implement relocation to other group
+      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
+                       evac_mode=self.op.mode,
+                       instances=list(self.instance_names))
+
+      ial.Run(self.op.iallocator)
+
       if not ial.success:
-        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
-                                 errors.ECODE_NORES)
-      result = ial.result
-    return result
+        raise errors.OpPrereqError("Can't compute node evacuation using"
+                                   " iallocator '%s': %s" %
+                                   (self.op.iallocator, ial.info),
+                                   errors.ECODE_NORES)
+
+      jobs = [[opcodes.OpCode.LoadOpCode(state) for state in jobset]
+              for jobset in ial.result]
+
+      # Set "early_release" flag on opcodes where available
+      early_release = self.op.early_release
+      for op in itertools.chain(*jobs): # pylint: disable-msg=W0142
+        try:
+          op.early_release = early_release
+        except AttributeError:
+          assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
+
+    elif self.op.remote_node is not None:
+      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
+      jobs = [
+        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
+                                        remote_node=self.op.remote_node,
+                                        disks=[],
+                                        mode=constants.REPLACE_DISK_CHG,
+                                        early_release=self.op.early_release)]
+        for instance_name in self.instance_names
+        ]
+
+    else:
+      raise errors.ProgrammerError("No iallocator or remote node")
+
+    return ResultWithJobs(jobs)
 
 
 class LUInstanceGrowDisk(LogicalUnit):
@@ -10083,6 +10472,7 @@ class LUInstanceSetParams(LogicalUnit):
       self.be_inst = i_bedict # the new dict (without defaults)
     else:
       self.be_new = self.be_inst = {}
+    be_old = cluster.FillBE(instance)
 
     # osparams processing
     if self.op.osparams:
@@ -10094,7 +10484,8 @@ class LUInstanceSetParams(LogicalUnit):
 
     self.warn = []
 
-    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
+    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
+        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
       mem_check_list = [pnode]
       if be_new[constants.BE_AUTO_BALANCE]:
         # either we changed auto_balance to yes or it was from before
@@ -10135,16 +10526,17 @@ class LUInstanceSetParams(LogicalUnit):
         for node, nres in nodeinfo.items():
           if node not in instance.secondary_nodes:
             continue
-          msg = nres.fail_msg
-          if msg:
-            self.warn.append("Can't get info from secondary node %s: %s" %
-                             (node, msg))
-          elif not isinstance(nres.payload.get('memory_free', None), int):
-            self.warn.append("Secondary node %s didn't return free"
-                             " memory information" % node)
+          nres.Raise("Can't get info from secondary node %s" % node,
+                     prereq=True, ecode=errors.ECODE_STATE)
+          if not isinstance(nres.payload.get('memory_free', None), int):
+            raise errors.OpPrereqError("Secondary node %s didn't return free"
+                                       " memory information" % node,
+                                       errors.ECODE_STATE)
           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
-            self.warn.append("Not enough memory to failover instance to"
-                             " secondary node %s" % node)
+            raise errors.OpPrereqError("This change will prevent the instance"
+                                       " from failover to its secondary node"
+                                       " %s, due to not enough memory" % node,
+                                       errors.ECODE_STATE)
 
     # NIC processing
     self.nic_pnew = {}
@@ -10802,7 +11194,8 @@ class LUBackupExport(LogicalUnit):
             not self.op.remove_instance):
           assert not activate_disks
           feedback_fn("Starting instance %s" % instance.name)
-          result = self.rpc.call_instance_start(src_node, instance, None, None)
+          result = self.rpc.call_instance_start(src_node, instance,
+                                                None, None, False)
           msg = result.fail_msg
           if msg:
             feedback_fn("Failed to start instance: %s" % msg)
@@ -11783,26 +12176,28 @@ class IAllocator(object):
     self.in_text = self.out_text = self.in_data = self.out_data = None
     # init all input fields so that pylint is happy
     self.mode = mode
-    self.mem_size = self.disks = self.disk_template = None
+    self.memory = self.disks = self.disk_template = None
     self.os = self.tags = self.nics = self.vcpus = None
     self.hypervisor = None
     self.relocate_from = None
     self.name = None
     self.evac_nodes = None
     self.instances = None
-    self.reloc_mode = None
-    self.target_groups = None
+    self.evac_mode = None
+    self.target_groups = []
     # computed fields
     self.required_nodes = None
     # init result fields
     self.success = self.info = self.result = None
 
     try:
-      (fn, keyset) = self._MODE_DATA[self.mode]
+      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
     except KeyError:
       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
                                    " IAllocator" % self.mode)
 
+    keyset = [n for (n, _) in keydata]
+
     for key in kwargs:
       if key not in keyset:
         raise errors.ProgrammerError("Invalid input parameter '%s' to"
@@ -11813,7 +12208,7 @@ class IAllocator(object):
       if key not in kwargs:
         raise errors.ProgrammerError("Missing input parameter '%s' to"
                                      " IAllocator" % key)
-    self._BuildInputData(compat.partial(fn, self))
+    self._BuildInputData(compat.partial(fn, self), keydata)
 
   def _ComputeClusterData(self):
     """Compute the generic allocator input data.
@@ -11842,8 +12237,7 @@ class IAllocator(object):
       hypervisor_name = self.hypervisor
     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
-    elif self.mode in (constants.IALLOCATOR_MODE_MEVAC,
-                       constants.IALLOCATOR_MODE_MRELOC):
+    else:
       hypervisor_name = cluster_info.enabled_hypervisors[0]
 
     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
@@ -12023,11 +12417,12 @@ class IAllocator(object):
       "tags": self.tags,
       "os": self.os,
       "vcpus": self.vcpus,
-      "memory": self.mem_size,
+      "memory": self.memory,
       "disks": self.disks,
       "disk_space_total": disk_space,
       "nics": self.nics,
       "required_nodes": self.required_nodes,
+      "hypervisor": self.hypervisor,
       }
 
     return request
@@ -12077,17 +12472,25 @@ class IAllocator(object):
       }
     return request
 
-  def _AddMultiRelocate(self):
-    """Get data for multi-relocate requests.
+  def _AddNodeEvacuate(self):
+    """Get data for node-evacuate requests.
+
+    """
+    return {
+      "instances": self.instances,
+      "evac_mode": self.evac_mode,
+      }
+
+  def _AddChangeGroup(self):
+    """Get data for node-evacuate requests.
 
     """
     return {
       "instances": self.instances,
-      "reloc_mode": self.reloc_mode,
       "target_groups": self.target_groups,
       }
 
-  def _BuildInputData(self, fn):
+  def _BuildInputData(self, fn, keydata):
     """Build input data structures.
 
     """
@@ -12095,21 +12498,58 @@ class IAllocator(object):
 
     request = fn()
     request["type"] = self.mode
+    for keyname, keytype in keydata:
+      if keyname not in request:
+        raise errors.ProgrammerError("Request parameter %s is missing" %
+                                     keyname)
+      val = request[keyname]
+      if not keytype(val):
+        raise errors.ProgrammerError("Request parameter %s doesn't pass"
+                                     " validation, value %s, expected"
+                                     " type %s" % (keyname, val, keytype))
     self.in_data["request"] = request
 
     self.in_text = serializer.Dump(self.in_data)
 
+  _STRING_LIST = ht.TListOf(ht.TString)
+  _JOBSET_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
+     # pylint: disable-msg=E1101
+     # Class '...' has no 'OP_ID' member
+     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
+                          opcodes.OpInstanceMigrate.OP_ID,
+                          opcodes.OpInstanceReplaceDisks.OP_ID])
+     })))
   _MODE_DATA = {
     constants.IALLOCATOR_MODE_ALLOC:
       (_AddNewInstance,
-       ["name", "mem_size", "disks", "disk_template", "os", "tags", "nics",
-        "vcpus", "hypervisor"]),
+       [
+        ("name", ht.TString),
+        ("memory", ht.TInt),
+        ("disks", ht.TListOf(ht.TDict)),
+        ("disk_template", ht.TString),
+        ("os", ht.TString),
+        ("tags", _STRING_LIST),
+        ("nics", ht.TListOf(ht.TDict)),
+        ("vcpus", ht.TInt),
+        ("hypervisor", ht.TString),
+        ], ht.TList),
     constants.IALLOCATOR_MODE_RELOC:
-      (_AddRelocateInstance, ["name", "relocate_from"]),
+      (_AddRelocateInstance,
+       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
+       ht.TList),
     constants.IALLOCATOR_MODE_MEVAC:
-      (_AddEvacuateNodes, ["evac_nodes"]),
-    constants.IALLOCATOR_MODE_MRELOC:
-      (_AddMultiRelocate, ["instances", "reloc_mode", "target_groups"]),
+      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
+       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
+     constants.IALLOCATOR_MODE_NODE_EVAC:
+      (_AddNodeEvacuate, [
+        ("instances", _STRING_LIST),
+        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
+        ], _JOBSET_LIST),
+     constants.IALLOCATOR_MODE_CHG_GROUP:
+      (_AddChangeGroup, [
+        ("instances", _STRING_LIST),
+        ("target_groups", _STRING_LIST),
+        ], _JOBSET_LIST),
     }
 
   def Run(self, name, validate=True, call_fn=None):
@@ -12152,28 +12592,48 @@ class IAllocator(object):
                                  " missing key '%s'" % key)
       setattr(self, key, rdict[key])
 
-    if not isinstance(rdict["result"], list):
-      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
-                               " is not a list")
-
-    if self.mode == constants.IALLOCATOR_MODE_RELOC:
-      assert self.relocate_from is not None
-      assert self.required_nodes == 1
+    if not self._result_check(self.result):
+      raise errors.OpExecError("Iallocator returned invalid result,"
+                               " expected %s, got %s" %
+                               (self._result_check, self.result),
+                               errors.ECODE_INVAL)
 
+    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
+                     constants.IALLOCATOR_MODE_MEVAC):
       node2group = dict((name, ndata["group"])
                         for (name, ndata) in self.in_data["nodes"].items())
 
       fn = compat.partial(self._NodesToGroups, node2group,
                           self.in_data["nodegroups"])
 
-      request_groups = fn(self.relocate_from)
-      result_groups = fn(rdict["result"])
+      if self.mode == constants.IALLOCATOR_MODE_RELOC:
+        assert self.relocate_from is not None
+        assert self.required_nodes == 1
+
+        request_groups = fn(self.relocate_from)
+        result_groups = fn(rdict["result"])
+
+        if result_groups != request_groups:
+          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
+                                   " differ from original groups (%s)" %
+                                   (utils.CommaJoin(result_groups),
+                                    utils.CommaJoin(request_groups)))
+      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
+        request_groups = fn(self.evac_nodes)
+        for (instance_name, secnode) in self.result:
+          result_groups = fn([secnode])
+          if result_groups != request_groups:
+            raise errors.OpExecError("Iallocator returned new secondary node"
+                                     " '%s' (group '%s') for instance '%s'"
+                                     " which is not in original group '%s'" %
+                                     (secnode, utils.CommaJoin(result_groups),
+                                      instance_name,
+                                      utils.CommaJoin(request_groups)))
+      else:
+        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
 
-      if result_groups != request_groups:
-        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
-                                 " differ from original groups (%s)" %
-                                 (utils.CommaJoin(result_groups),
-                                  utils.CommaJoin(request_groups)))
+    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
+      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
 
     self.out_data = rdict
 
@@ -12224,7 +12684,7 @@ class LUTestAllocator(NoHooksLU):
 
     """
     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
-      for attr in ["mem_size", "disks", "disk_template",
+      for attr in ["memory", "disks", "disk_template",
                    "os", "tags", "nics", "vcpus"]:
         if not hasattr(self.op, attr):
           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
@@ -12241,10 +12701,10 @@ class LUTestAllocator(NoHooksLU):
                                    errors.ECODE_INVAL)
       for row in self.op.disks:
         if (not isinstance(row, dict) or
-            "size" not in row or
-            not isinstance(row["size"], int) or
-            "mode" not in row or
-            row["mode"] not in ['r', 'w']):
+            constants.IDISK_SIZE not in row or
+            not isinstance(row[constants.IDISK_SIZE], int) or
+            constants.IDISK_MODE not in row or
+            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
           raise errors.OpPrereqError("Invalid contents of the 'disks'"
                                      " parameter", errors.ECODE_INVAL)
       if self.op.hypervisor is None:
@@ -12257,6 +12717,11 @@ class LUTestAllocator(NoHooksLU):
       if not hasattr(self.op, "evac_nodes"):
         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
                                    " opcode input", errors.ECODE_INVAL)
+    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
+                          constants.IALLOCATOR_MODE_NODE_EVAC):
+      if not self.op.instances:
+        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
+      self.op.instances = _GetWantedInstances(self, self.op.instances)
     else:
       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
                                  self.op.mode, errors.ECODE_INVAL)
@@ -12277,7 +12742,7 @@ class LUTestAllocator(NoHooksLU):
       ial = IAllocator(self.cfg, self.rpc,
                        mode=self.op.mode,
                        name=self.op.name,
-                       mem_size=self.op.mem_size,
+                       memory=self.op.memory,
                        disks=self.op.disks,
                        disk_template=self.op.disk_template,
                        os=self.op.os,
@@ -12296,6 +12761,16 @@ class LUTestAllocator(NoHooksLU):
       ial = IAllocator(self.cfg, self.rpc,
                        mode=self.op.mode,
                        evac_nodes=self.op.evac_nodes)
+    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
+      ial = IAllocator(self.cfg, self.rpc,
+                       mode=self.op.mode,
+                       instances=self.op.instances,
+                       target_groups=self.op.target_groups)
+    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
+      ial = IAllocator(self.cfg, self.rpc,
+                       mode=self.op.mode,
+                       instances=self.op.instances,
+                       evac_mode=self.op.evac_mode)
     else:
       raise errors.ProgrammerError("Uncatched mode %s in"
                                    " LUTestAllocator.Exec", self.op.mode)