Merge branch 'stable-2.1' into devel-2.1

[ganeti-local] / lib / cmdlib.py
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index c054e6b..cb922eb 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -21,7 +21,10 @@
  
  """Module implementing the master-side code."""
  
-# pylint: disable-msg=W0613,W0201
+# pylint: disable-msg=W0201
+
+# W0201 since most LU attributes are defined in CheckPrereq or similar
+# functions
  
  import os
  import os.path
@@ -87,9 +90,9 @@ class LogicalUnit(object):
      self.recalculate_locks = {}
      self.__ssh = None
      # logging
-    self.LogWarning = processor.LogWarning
-    self.LogInfo = processor.LogInfo
-    self.LogStep = processor.LogStep
+    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
+    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
+    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
      # support for dry-run
      self.dry_run_result = None
  
@@ -277,6 +280,9 @@ class LogicalUnit(object):
          and hook results
  
      """
+    # API must be kept, thus we ignore the unused argument and could
+    # be a function warnings
+    # pylint: disable-msg=W0613,R0201
      return lu_result
  
    def _ExpandAndLockInstance(self):
@@ -347,7 +353,7 @@ class LogicalUnit(object):
      del self.recalculate_locks[locking.LEVEL_NODE]
  
  
-class NoHooksLU(LogicalUnit):
+class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
    """Simple LU which runs no hooks.
  
    This LU is intended as a parent for other LogicalUnits which will
@@ -357,6 +363,14 @@ class NoHooksLU(LogicalUnit):
    HPATH = None
    HTYPE = None
  
+  def BuildHooksEnv(self):
+    """Empty BuildHooksEnv for NoHooksLu.
+
+    This just raises an error.
+
+    """
+    assert False, "BuildHooksEnv called for NoHooksLUs"
+
  
  class Tasklet:
    """Tasklet base class.
@@ -500,6 +514,21 @@ def _CheckBooleanOpField(op, name):
    setattr(op, name, val)
  
  
+def _CheckGlobalHvParams(params):
+  """Validates that given hypervisor params are not global ones.
+
+  This will ensure that instances don't get customised versions of
+  global params.
+
+  """
+  used_globals = constants.HVC_GLOBALS.intersection(params)
+  if used_globals:
+    msg = ("The following hypervisor parameters are global and cannot"
+           " be customized at instance level, please modify them at"
+           " cluster level: %s" % utils.CommaJoin(used_globals))
+    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
+
+
  def _CheckNodeOnline(lu, node):
    """Ensure that a given node is online.
  
@@ -673,7 +702,7 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None):
    }
    if override:
      args.update(override)
-  return _BuildInstanceHookEnv(**args)
+  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
  
  
  def _AdjustCandidatePool(lu, exceptions):
@@ -683,7 +712,7 @@ def _AdjustCandidatePool(lu, exceptions):
    mod_list = lu.cfg.MaintainCandidatePool(exceptions)
    if mod_list:
      lu.LogInfo("Promoted nodes to master candidate role: %s",
-               ", ".join(node.name for node in mod_list))
+               utils.CommaJoin(node.name for node in mod_list))
      for name in mod_list:
        lu.context.ReaddNode(name)
    mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
@@ -883,6 +912,7 @@ class LUDestroyCluster(LogicalUnit):
      try:
        hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
      except:
+      # pylint: disable-msg=W0702
        self.LogWarning("Errors occurred running hooks on %s" % master)
  
      result = self.rpc.call_node_stop_master(master, False)
@@ -929,6 +959,7 @@ class LUVerifyCluster(LogicalUnit):
    ENODESSH = (TNODE, "ENODESSH")
    ENODEVERSION = (TNODE, "ENODEVERSION")
    ENODESETUP = (TNODE, "ENODESETUP")
+  ENODETIME = (TNODE, "ENODETIME")
  
    ETYPE_FIELD = "code"
    ETYPE_ERROR = "ERROR"
@@ -1002,7 +1033,7 @@ class LUVerifyCluster(LogicalUnit):
  
      """
      node = nodeinfo.name
-    _ErrorIf = self._ErrorIf
+    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
  
      # main result, node_result should be a non-empty dict
      test = not node_result or not isinstance(node_result, dict)
@@ -1136,7 +1167,7 @@ class LUVerifyCluster(LogicalUnit):
          # check that ':' is not present in PV names, since it's a
          # special character for lvcreate (denotes the range of PEs to
          # use on the PV)
-        for size, pvname, owner_vg in pvlist:
+        for _, pvname, owner_vg in pvlist:
            test = ":" in pvname
            _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
                     " '%s' of VG '%s'", pvname, owner_vg)
@@ -1149,7 +1180,7 @@ class LUVerifyCluster(LogicalUnit):
      available on the instance's node.
  
      """
-    _ErrorIf = self._ErrorIf
+    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
      node_current = instanceconfig.primary_node
  
      node_vol_should = {}
@@ -1264,7 +1295,7 @@ class LUVerifyCluster(LogicalUnit):
  
      """
      self.bad = False
-    _ErrorIf = self._ErrorIf
+    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
      verbose = self.op.verbose
      self._feedback_fn = feedback_fn
      feedback_fn("* Verifying global settings")
@@ -1311,14 +1342,23 @@ class LUVerifyCluster(LogicalUnit):
        constants.NV_VERSION: None,
        constants.NV_HVINFO: self.cfg.GetHypervisorType(),
        constants.NV_NODESETUP: None,
+      constants.NV_TIME: None,
        }
+
      if vg_name is not None:
        node_verify_param[constants.NV_VGLIST] = None
        node_verify_param[constants.NV_LVLIST] = vg_name
        node_verify_param[constants.NV_PVLIST] = [vg_name]
        node_verify_param[constants.NV_DRBDLIST] = None
+
+    # Due to the way our RPC system works, exact response times cannot be
+    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
+    # time before and after executing the request, we can at least have a time
+    # window.
+    nvinfo_starttime = time.time()
      all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
                                             self.cfg.GetClusterName())
+    nvinfo_endtime = time.time()
  
      cluster = self.cfg.GetClusterInfo()
      master_node = self.cfg.GetMasterNode()
@@ -1365,6 +1405,7 @@ class LUVerifyCluster(LogicalUnit):
          else:
            instance = instanceinfo[instance]
            node_drbd[minor] = (instance.name, instance.admin_up)
+
        self._VerifyNode(node_i, file_names, local_checksums,
                         nresult, master_files, node_drbd, vg_name)
  
@@ -1398,6 +1439,27 @@ class LUVerifyCluster(LogicalUnit):
        if test:
          continue
  
+      # Node time
+      ntime = nresult.get(constants.NV_TIME, None)
+      try:
+        ntime_merged = utils.MergeTime(ntime)
+      except (ValueError, TypeError):
+        _ErrorIf(test, self.ENODETIME, node, "Node returned invalid time")
+
+      if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
+        ntime_diff = abs(nvinfo_starttime - ntime_merged)
+      elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
+        ntime_diff = abs(ntime_merged - nvinfo_endtime)
+      else:
+        ntime_diff = None
+
+      _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
+               "Node time diverges by at least %0.1fs from master node time",
+               ntime_diff)
+
+      if ntime_diff is not None:
+        continue
+
        try:
          node_info[node] = {
            "mfree": int(nodeinfo['memory_free']),
@@ -1483,7 +1545,7 @@ class LUVerifyCluster(LogicalUnit):
        # warn that the instance lives on offline nodes
        _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
                 "instance lives on offline node(s) %s",
-               ", ".join(inst_nodes_offline))
+               utils.CommaJoin(inst_nodes_offline))
  
      feedback_fn("* Verifying orphan volumes")
      self._VerifyOrphanVolumes(node_vol_should, node_volume)
@@ -1536,13 +1598,13 @@ class LUVerifyCluster(LogicalUnit):
        assert hooks_results, "invalid result from hooks"
  
        for node_name in hooks_results:
-        show_node_header = True
          res = hooks_results[node_name]
          msg = res.fail_msg
          test = msg and not res.offline
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
                        "Communication failure in hooks execution: %s", msg)
-        if test:
+        if res.offline or msg:
+          # No need to investigate payload if node is offline or gave an error.
            # override manually lu_result here as _ErrorIf only
            # overrides self.bad
            lu_result = 1
@@ -1626,7 +1688,7 @@ class LUVerifyDisks(NoHooksLU):
          continue
  
        lvs = node_res.payload
-      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
+      for lv_name, (_, _, lv_online) in lvs.items():
          inst = nv_dict.pop((node, lv_name), None)
          if (not lv_online and inst is not None
              and inst.name not in res_instances):
@@ -1782,13 +1844,14 @@ class LURenameCluster(LogicalUnit):
        "NEW_NAME": self.op.name,
        }
      mn = self.cfg.GetMasterNode()
-    return env, [mn], [mn]
+    all_nodes = self.cfg.GetNodeList()
+    return env, [mn], all_nodes
  
    def CheckPrereq(self):
      """Verify that the passed name is a valid one.
  
      """
-    hostname = utils.HostInfo(self.op.name)
+    hostname = utils.GetHostInfo(self.op.name)
  
      new_name = hostname.name
      self.ip = new_ip = hostname.ip
@@ -1955,6 +2018,29 @@ class LUSetClusterParams(LogicalUnit):
        self.new_nicparams = objects.FillDict(
          cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
        objects.NIC.CheckParameterSyntax(self.new_nicparams)
+      nic_errors = []
+
+      # check all instances for consistency
+      for instance in self.cfg.GetAllInstancesInfo().values():
+        for nic_idx, nic in enumerate(instance.nics):
+          params_copy = copy.deepcopy(nic.nicparams)
+          params_filled = objects.FillDict(self.new_nicparams, params_copy)
+
+          # check parameter syntax
+          try:
+            objects.NIC.CheckParameterSyntax(params_filled)
+          except errors.ConfigurationError, err:
+            nic_errors.append("Instance %s, nic/%d: %s" %
+                              (instance.name, nic_idx, err))
+
+          # if we're moving instances to routed, check that they have an ip
+          target_mode = params_filled[constants.NIC_MODE]
+          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
+            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
+                              (instance.name, nic_idx))
+      if nic_errors:
+        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
+                                   "\n".join(nic_errors))
  
      # hypervisor list/parameters
      self.new_hvparams = objects.FillDict(cluster.hvparams, {})
@@ -1977,7 +2063,8 @@ class LUSetClusterParams(LogicalUnit):
        invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
        if invalid_hvs:
          raise errors.OpPrereqError("Enabled hypervisors contains invalid"
-                                   " entries: %s" % " ,".join(invalid_hvs),
+                                   " entries: %s" %
+                                   utils.CommaJoin(invalid_hvs),
                                     errors.ECODE_INVAL)
      else:
        self.hv_list = cluster.enabled_hypervisors
@@ -2096,7 +2183,7 @@ class LURedistributeConfig(NoHooksLU):
      _RedistributeAncillaryFiles(self)
  
  
-def _WaitForSync(lu, instance, oneshot=False, unlock=False):
+def _WaitForSync(lu, instance, oneshot=False):
    """Sleep and poll for an instance's disk to sync.
  
    """
@@ -2236,10 +2323,9 @@ class LUDiagnoseOS(NoHooksLU):
      """
  
    @staticmethod
-  def _DiagnoseByOS(node_list, rlist):
+  def _DiagnoseByOS(rlist):
      """Remaps a per-node return list into an a per-os per-node dictionary
  
-    @param node_list: a list with the names of all nodes
      @param rlist: a map with node names as keys and OS objects as values
  
      @rtype: dict
@@ -2277,7 +2363,7 @@ class LUDiagnoseOS(NoHooksLU):
      """
      valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
      node_data = self.rpc.call_os_diagnose(valid_nodes)
-    pol = self._DiagnoseByOS(valid_nodes, node_data)
+    pol = self._DiagnoseByOS(node_data)
      output = []
      calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
      calc_variants = "variants" in self.op.output_fields
@@ -2339,8 +2425,11 @@ class LURemoveNode(LogicalUnit):
        "NODE_NAME": self.op.node_name,
        }
      all_nodes = self.cfg.GetNodeList()
-    if self.op.node_name in all_nodes:
+    try:
        all_nodes.remove(self.op.node_name)
+    except ValueError:
+      logging.warning("Node %s which is about to be removed not found"
+                      " in the all nodes list", self.op.node_name)
      return env, all_nodes, all_nodes
  
    def CheckPrereq(self):
@@ -2393,8 +2482,9 @@ class LURemoveNode(LogicalUnit):
      # Run post hooks on the node before it's removed
      hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
      try:
-      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
+      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
      except:
+      # pylint: disable-msg=W0702
        self.LogWarning("Errors occurred running hooks on %s" % node.name)
  
      result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
@@ -2408,6 +2498,7 @@ class LUQueryNodes(NoHooksLU):
    """Logical unit for querying nodes.
  
    """
+  # pylint: disable-msg=W0142
    _OP_REQP = ["output_fields", "names", "use_locking"]
    REQ_BGL = False
  
@@ -2508,10 +2599,9 @@ class LUQueryNodes(NoHooksLU):
      inst_fields = frozenset(("pinst_cnt", "pinst_list",
                               "sinst_cnt", "sinst_list"))
      if inst_fields & frozenset(self.op.output_fields):
-      instancelist = self.cfg.GetInstanceList()
+      inst_data = self.cfg.GetAllInstancesInfo()
  
-      for instance_name in instancelist:
-        inst = self.cfg.GetInstanceInfo(instance_name)
+      for inst in inst_data.values():
          if inst.primary_node in node_to_primary:
            node_to_primary[inst.primary_node].add(inst.name)
          for secnode in inst.secondary_nodes:
@@ -2843,7 +2933,7 @@ class LUAddNode(LogicalUnit):
      node_name = self.op.node_name
      cfg = self.cfg
  
-    dns_data = utils.HostInfo(node_name)
+    dns_data = utils.GetHostInfo(node_name)
  
      node = dns_data.name
      primary_ip = self.op.primary_ip = dns_data.ip
@@ -2939,7 +3029,7 @@ class LUAddNode(LogicalUnit):
      # later in the procedure; this also means that if the re-add
      # fails, we are left with a non-offlined, broken node
      if self.op.readd:
-      new_node.drained = new_node.offline = False
+      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
        self.LogInfo("Readding a node, the offline/drained flags were reset")
        # if we demote the node, we do cleanup later in the procedure
        new_node.master_candidate = self.master_candidate
@@ -3022,7 +3112,7 @@ class LUAddNode(LogicalUnit):
                            " candidate status: %s" % msg)
      else:
        _RedistributeAncillaryFiles(self, additional_nodes=[node])
-      self.context.AddNode(new_node)
+      self.context.AddNode(new_node, self.proc.GetECId())
  
  
  class LUSetNodeParams(LogicalUnit):
@@ -3117,7 +3207,7 @@ class LUSetNodeParams(LogicalUnit):
  
      # If we're being deofflined/drained, we'll MC ourself if needed
      if (deoffline_or_drain and not offline_or_drain and not
-        self.op.master_candidate == True):
+        self.op.master_candidate == True and not node.master_candidate):
        self.op.master_candidate = _DecideSelfPromotion(self)
        if self.op.master_candidate:
          self.LogInfo("Autopromoting node to master candidate")
@@ -3903,7 +3993,8 @@ class LUReinstallInstance(LogicalUnit):
      _StartInstanceDisks(self, inst, None)
      try:
        feedback_fn("Running the instance OS create scripts...")
-      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
+      # FIXME: pass debug option from opcode to backend
+      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True, 0)
        result.Raise("Could not install OS for instance %s on node %s" %
                     (inst.name, inst.primary_node))
      finally:
@@ -3986,7 +4077,7 @@ class LURecreateInstanceDisks(LogicalUnit):
  
      """
      to_skip = []
-    for idx, disk in enumerate(self.instance.disks):
+    for idx, _ in enumerate(self.instance.disks):
        if idx not in self.op.disks: # disk idx has not been passed in
          to_skip.append(idx)
          continue
@@ -4041,7 +4132,7 @@ class LURenameInstance(LogicalUnit):
      self.instance = instance
  
      # new name verification
-    name_info = utils.HostInfo(self.op.new_name)
+    name_info = utils.GetHostInfo(self.op.new_name)
  
      self.op.new_name = new_name = name_info.name
      instance_list = self.cfg.GetInstanceList()
@@ -4087,7 +4178,7 @@ class LURenameInstance(LogicalUnit):
      _StartInstanceDisks(self, inst, None)
      try:
        result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
-                                                 old_name)
+                                                 old_name, 0)
        msg = result.fail_msg
        if msg:
          msg = ("Could not run OS rename script for instance %s on node %s"
@@ -4181,6 +4272,7 @@ class LUQueryInstances(NoHooksLU):
    """Logical unit for querying instances.
  
    """
+  # pylint: disable-msg=W0142
    _OP_REQP = ["output_fields", "names", "use_locking"]
    REQ_BGL = False
    _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
@@ -4200,7 +4292,8 @@ class LUQueryInstances(NoHooksLU):
                                      "hvparams",
                                      ] + _SIMPLE_FIELDS +
                                    ["hv/%s" % name
-                                   for name in constants.HVS_PARAMETERS] +
+                                   for name in constants.HVS_PARAMETERS
+                                   if name not in constants.HVC_GLOBALS] +
                                    ["be/%s" % name
                                     for name in constants.BES_PARAMETERS])
    _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
@@ -4241,6 +4334,8 @@ class LUQueryInstances(NoHooksLU):
      """Computes the list of nodes and their attributes.
  
      """
+    # pylint: disable-msg=R0912
+    # way too many branches here
      all_info = self.cfg.GetAllInstancesInfo()
      if self.wanted == locking.ALL_SET:
        # caller didn't specify instance names, so ordering is not important
@@ -4295,7 +4390,7 @@ class LUQueryInstances(NoHooksLU):
      cluster = self.cfg.GetClusterInfo()
      for instance in instance_list:
        iout = []
-      i_hv = cluster.FillHV(instance)
+      i_hv = cluster.FillHV(instance, skip_globals=True)
        i_be = cluster.FillBE(instance)
        i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
                                   nic.nicparams) for nic in instance.nics]
@@ -4382,7 +4477,8 @@ class LUQueryInstances(NoHooksLU):
          elif field == "hvparams":
            val = i_hv
          elif (field.startswith(HVPREFIX) and
-              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
+              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
+              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
            val = i_hv.get(field[len(HVPREFIX):], None)
          elif field == "beparams":
            val = i_be
@@ -4712,7 +4808,7 @@ class LUMoveInstance(LogicalUnit):
      for idx, dsk in enumerate(instance.disks):
        if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
          raise errors.OpPrereqError("Instance disk %d has a complex layout,"
-                                   " cannot copy", errors.ECODE_STATE)
+                                   " cannot copy" % idx, errors.ECODE_STATE)
  
      _CheckNodeOnline(self, target_node)
      _CheckNodeNotDrained(self, target_node)
@@ -5147,6 +5243,7 @@ class TLMigrateInstance(Tasklet):
      if msg:
        logging.error("Instance pre-migration failed, trying to revert"
                      " disk status: %s", msg)
+      self.feedback_fn("Pre-migration failed, aborting")
        self._AbortMigration()
        self._RevertDiskStatus()
        raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
@@ -5161,6 +5258,7 @@ class TLMigrateInstance(Tasklet):
      if msg:
        logging.error("Instance migration failed, trying to revert"
                      " disk status: %s", msg)
+      self.feedback_fn("Migration failed, aborting")
        self._AbortMigration()
        self._RevertDiskStatus()
        raise errors.OpExecError("Could not migrate instance %s: %s" %
@@ -5292,7 +5390,7 @@ def _GenerateUniqueNames(lu, exts):
    """
    results = []
    for val in exts:
-    new_id = lu.cfg.GenerateUniqueID()
+    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
      results.append("%s%s" % (new_id, val))
    return results
  
@@ -5304,7 +5402,7 @@ def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
    """
    port = lu.cfg.AllocatePort()
    vgname = lu.cfg.GetVGName()
-  shared_secret = lu.cfg.GenerateDRBDSecret()
+  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
    dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
                            logical_id=(vgname, names[0]))
    dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
@@ -5544,6 +5642,19 @@ class LUCreateInstance(LogicalUnit):
                "hvparams", "beparams"]
    REQ_BGL = False
  
+  def CheckArguments(self):
+    """Check arguments.
+
+    """
+    # do not require name_check to ease forward/backward compatibility
+    # for tools
+    if not hasattr(self.op, "name_check"):
+      self.op.name_check = True
+    if self.op.ip_check and not self.op.name_check:
+      # TODO: make the ip check more flexible and not depend on the name check
+      raise errors.OpPrereqError("Cannot do ip checks without a name check",
+                                 errors.ECODE_INVAL)
+
    def _ExpandNode(self, node):
      """Expands and checks one node name.
  
@@ -5597,6 +5708,8 @@ class LUCreateInstance(LogicalUnit):
      hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
      hv_type.CheckParameterSyntax(filled_hvp)
      self.hv_full = filled_hvp
+    # check that we don't specify global parameters on an instance
+    _CheckGlobalHvParams(self.op.hvparams)
  
      # fill and remember the beparams dict
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
@@ -5606,8 +5719,14 @@ class LUCreateInstance(LogicalUnit):
      #### instance parameters check
  
      # instance name verification
-    hostname1 = utils.HostInfo(self.op.instance_name)
-    self.op.instance_name = instance_name = hostname1.name
+    if self.op.name_check:
+      hostname1 = utils.GetHostInfo(self.op.instance_name)
+      self.op.instance_name = instance_name = hostname1.name
+      # used in CheckPrereq for ip ping check
+      self.check_ip = hostname1.ip
+    else:
+      instance_name = self.op.instance_name
+      self.check_ip = None
  
      # this is just a preventive check, but someone might still add this
      # instance in the meantime, and creation will fail at lock-add time
@@ -5636,6 +5755,10 @@ class LUCreateInstance(LogicalUnit):
        if ip is None or ip.lower() == constants.VALUE_NONE:
          nic_ip = None
        elif ip.lower() == constants.VALUE_AUTO:
+        if not self.op.name_check:
+          raise errors.OpPrereqError("IP address set to auto but name checks"
+                                     " have been skipped. Aborting.",
+                                     errors.ECODE_INVAL)
          nic_ip = hostname1.ip
        else:
          if not utils.IsValidIP(ip):
@@ -5644,7 +5767,7 @@ class LUCreateInstance(LogicalUnit):
                                       errors.ECODE_INVAL)
          nic_ip = ip
  
-      # TODO: check the ip for uniqueness !!
+      # TODO: check the ip address for uniqueness
        if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
          raise errors.OpPrereqError("Routed nic mode requires an ip address",
                                     errors.ECODE_INVAL)
@@ -5652,15 +5775,14 @@ class LUCreateInstance(LogicalUnit):
        # MAC address verification
        mac = nic.get("mac", constants.VALUE_AUTO)
        if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
-        if not utils.IsValidMac(mac.lower()):
-          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
-                                     mac, errors.ECODE_INVAL)
-        else:
-          # or validate/reserve the current one
-          if self.cfg.IsMacInUse(mac):
-            raise errors.OpPrereqError("MAC address %s already in use"
-                                       " in cluster" % mac,
-                                       errors.ECODE_NOTUNIQUE)
+        mac = utils.NormalizeAndValidateMac(mac)
+
+        try:
+          self.cfg.ReserveMAC(mac, self.proc.GetECId())
+        except errors.ReservationError:
+          raise errors.OpPrereqError("MAC address %s already in use"
+                                     " in cluster" % mac,
+                                     errors.ECODE_NOTUNIQUE)
  
        # bridge verification
        bridge = nic.get("bridge", None)
@@ -5697,14 +5819,11 @@ class LUCreateInstance(LogicalUnit):
          raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
        try:
          size = int(size)
-      except ValueError:
+      except (TypeError, ValueError):
          raise errors.OpPrereqError("Invalid disk size '%s'" % size,
                                     errors.ECODE_INVAL)
        self.disks.append({"size": size, "mode": mode})
  
-    # used in CheckPrereq for ip ping check
-    self.check_ip = hostname1.ip
-
      # file storage checks
      if (self.op.file_driver and
          not self.op.file_driver in constants.FILE_DRIVER):
@@ -5798,7 +5917,7 @@ class LUCreateInstance(LogicalUnit):
      self.op.pnode = ial.nodes[0]
      self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
                   self.op.instance_name, self.op.iallocator,
-                 ", ".join(ial.nodes))
+                 utils.CommaJoin(ial.nodes))
      if ial.required_nodes == 2:
        self.op.snode = ial.nodes[1]
  
@@ -5915,12 +6034,8 @@ class LUCreateInstance(LogicalUnit):
              nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
  
      # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
-    # ip ping checks (we use the same ip that was resolved in ExpandNames)
-    if self.op.start and not self.op.ip_check:
-      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
-                                 " adding an instance in start mode",
-                                 errors.ECODE_INVAL)
  
+    # ip ping checks (we use the same ip that was resolved in ExpandNames)
      if self.op.ip_check:
        if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
          raise errors.OpPrereqError("IP %s of instance %s already in use" %
@@ -5937,7 +6052,7 @@ class LUCreateInstance(LogicalUnit):
      # creation job will fail.
      for nic in self.nics:
        if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
-        nic.mac = self.cfg.GenerateMAC()
+        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
  
      #### allocator run
  
@@ -6076,7 +6191,8 @@ class LUCreateInstance(LogicalUnit):
  
      feedback_fn("adding instance %s to cluster config" % instance)
  
-    self.cfg.AddInstance(iobj)
+    self.cfg.AddInstance(iobj, self.proc.GetECId())
+
      # Declare that we don't want to remove the instance lock anymore, as we've
      # added the instance to the config
      del self.remove_locks[locking.LEVEL_INSTANCE]
@@ -6115,7 +6231,8 @@ class LUCreateInstance(LogicalUnit):
      if iobj.disk_template != constants.DT_DISKLESS:
        if self.op.mode == constants.INSTANCE_CREATE:
          feedback_fn("* running the instance OS create scripts...")
-        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
+        # FIXME: pass debug option from opcode to backend
+        result = self.rpc.call_instance_os_add(pnode_name, iobj, False, 0)
          result.Raise("Could not add os for instance %s"
                       " on node %s" % (instance, pnode_name))
  
@@ -6124,9 +6241,10 @@ class LUCreateInstance(LogicalUnit):
          src_node = self.op.src_node
          src_images = self.src_images
          cluster_name = self.cfg.GetClusterName()
+        # FIXME: pass debug option from opcode to backend
          import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
                                                           src_node, src_images,
-                                                         cluster_name)
+                                                         cluster_name, 0)
          msg = import_result.fail_msg
          if msg:
            self.LogWarning("Error while importing the disk images for instance"
@@ -6245,7 +6363,7 @@ class LUReplaceDisks(LogicalUnit):
  
      self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
                                     self.op.iallocator, self.op.remote_node,
-                                   self.op.disks)
+                                   self.op.disks, False)
  
      self.tasklets = [self.replacer]
  
@@ -6337,7 +6455,8 @@ class LUEvacuateNode(LogicalUnit):
        names.append(inst.name)
  
        replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
-                                self.op.iallocator, self.op.remote_node, [])
+                                self.op.iallocator, self.op.remote_node, [],
+                                True)
        tasklets.append(replacer)
  
      self.tasklets = tasklets
@@ -6379,7 +6498,7 @@ class TLReplaceDisks(Tasklet):
  
    """
    def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
-               disks):
+               disks, delay_iallocator):
      """Initializes this class.
  
      """
@@ -6391,6 +6510,7 @@ class TLReplaceDisks(Tasklet):
      self.iallocator_name = iallocator_name
      self.remote_node = remote_node
      self.disks = disks
+    self.delay_iallocator = delay_iallocator
  
      # Runtime data
      self.instance = None
@@ -6442,7 +6562,8 @@ class TLReplaceDisks(Tasklet):
      if len(ial.nodes) != ial.required_nodes:
        raise errors.OpPrereqError("iallocator '%s' returned invalid number"
                                   " of nodes (%s), required %s" %
-                                 (len(ial.nodes), ial.required_nodes),
+                                 (iallocator_name,
+                                  len(ial.nodes), ial.required_nodes),
                                   errors.ECODE_FAULT)
  
      remote_node_name = ial.nodes[0]
@@ -6476,6 +6597,19 @@ class TLReplaceDisks(Tasklet):
                                   len(instance.secondary_nodes),
                                   errors.ECODE_FAULT)
  
+    if not self.delay_iallocator:
+      self._CheckPrereq2()
+
+  def _CheckPrereq2(self):
+    """Check prerequisites, second part.
+
+    This function should always be part of CheckPrereq. It was separated and is
+    now called from Exec because during node evacuation iallocator was only
+    called with an unmodified cluster model, not taking planned changes into
+    account.
+
+    """
+    instance = self.instance
      secondary_node = instance.secondary_nodes[0]
  
      if self.iallocator_name is None:
@@ -6579,12 +6713,15 @@ class TLReplaceDisks(Tasklet):
      This dispatches the disk replacement to the appropriate handler.
  
      """
+    if self.delay_iallocator:
+      self._CheckPrereq2()
+
      if not self.disks:
        feedback_fn("No disks need replacement")
        return
  
      feedback_fn("Replacing disk(s) %s for %s" %
-                (", ".join([str(i) for i in self.disks]), self.instance.name))
+                (utils.CommaJoin(self.disks), self.instance.name))
  
      activate_disks = (not self.instance.admin_up)
  
@@ -6689,7 +6826,7 @@ class TLReplaceDisks(Tasklet):
      return iv_names
  
    def _CheckDevices(self, node_name, iv_names):
-    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
+    for name, (dev, _, _) in iv_names.iteritems():
        self.cfg.SetDiskID(dev, node_name)
  
        result = self.rpc.call_blockdev_find(node_name, dev)
@@ -6705,7 +6842,7 @@ class TLReplaceDisks(Tasklet):
          raise errors.OpExecError("DRBD device %s is degraded!" % name)
  
    def _RemoveOldStorage(self, node_name, iv_names):
-    for name, (dev, old_lvs, _) in iv_names.iteritems():
+    for name, (_, old_lvs, _) in iv_names.iteritems():
        self.lu.LogInfo("Remove logical volumes for %s" % name)
  
        for lv in old_lvs:
@@ -6830,7 +6967,7 @@ class TLReplaceDisks(Tasklet):
      # This can fail as the old devices are degraded and _WaitForSync
      # does a combined result over all disks, so we don't check its return value
      self.lu.LogStep(5, steps_total, "Sync devices")
-    _WaitForSync(self.lu, self.instance, unlock=True)
+    _WaitForSync(self.lu, self.instance)
  
      # Check all devices manually
      self._CheckDevices(self.instance.primary_node, iv_names)
@@ -6900,6 +7037,7 @@ class TLReplaceDisks(Tasklet):
        if self.instance.primary_node == o_node1:
          p_minor = o_minor1
        else:
+        assert self.instance.primary_node == o_node2, "Three-node instance?"
          p_minor = o_minor2
  
        new_alone_id = (self.instance.primary_node, self.new_node, None,
@@ -6975,7 +7113,7 @@ class TLReplaceDisks(Tasklet):
      # This can fail as the old devices are degraded and _WaitForSync
      # does a combined result over all disks, so we don't check its return value
      self.lu.LogStep(5, steps_total, "Sync devices")
-    _WaitForSync(self.lu, self.instance, unlock=True)
+    _WaitForSync(self.lu, self.instance)
  
      # Check all devices manually
      self._CheckDevices(self.instance.primary_node, iv_names)
@@ -7135,6 +7273,14 @@ class LUGrowDisk(LogicalUnit):
        self.cfg.SetDiskID(disk, node)
        result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
        result.Raise("Grow request failed to node %s" % node)
+
+      # TODO: Rewrite code to work properly
+      # DRBD goes into sync mode for a short amount of time after executing the
+      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
+      # calling "resize" in sync mode fails. Sleeping for a short amount of
+      # time is a work-around.
+      time.sleep(5)
+
      disk.RecordGrow(self.op.amount)
      self.cfg.Update(instance, feedback_fn)
      if self.op.wait_for_sync:
@@ -7290,7 +7436,7 @@ class LUQueryInstanceData(NoHooksLU):
          "hypervisor": instance.hypervisor,
          "network_port": instance.network_port,
          "hv_instance": instance.hvparams,
-        "hv_actual": cluster.FillHV(instance),
+        "hv_actual": cluster.FillHV(instance, skip_globals=True),
          "be_instance": instance.beparams,
          "be_actual": cluster.FillBE(instance),
          "serial_no": instance.serial_no,
@@ -7327,6 +7473,9 @@ class LUSetInstanceParams(LogicalUnit):
              self.op.hvparams or self.op.beparams):
        raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
  
+    if self.op.hvparams:
+      _CheckGlobalHvParams(self.op.hvparams)
+
      # Disk validation
      disk_addremove = 0
      for disk_op, disk_dict in self.op.disks:
@@ -7353,7 +7502,7 @@ class LUSetInstanceParams(LogicalUnit):
                                       errors.ECODE_INVAL)
          try:
            size = int(size)
-        except ValueError, err:
+        except (TypeError, ValueError), err:
            raise errors.OpPrereqError("Invalid disk size parameter: %s" %
                                       str(err), errors.ECODE_INVAL)
          disk_dict['size'] = size
@@ -7410,9 +7559,8 @@ class LUSetInstanceParams(LogicalUnit):
        if 'mac' in nic_dict:
          nic_mac = nic_dict['mac']
          if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
-          if not utils.IsValidMac(nic_mac):
-            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac,
-                                       errors.ECODE_INVAL)
+          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
+
          if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
            raise errors.OpPrereqError("'auto' is not a valid MAC address when"
                                       " modifying an existing nic",
@@ -7482,7 +7630,8 @@ class LUSetInstanceParams(LogicalUnit):
      nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
      return env, nl, nl
  
-  def _GetUpdatedParams(self, old_params, update_dict,
+  @staticmethod
+  def _GetUpdatedParams(old_params, update_dict,
                          default_values, parameter_types):
      """Return the new params dict for the given params.
  
@@ -7622,10 +7771,14 @@ class LUSetInstanceParams(LogicalUnit):
          continue
        if nic_op != constants.DDM_ADD:
          # an existing nic
+        if not instance.nics:
+          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
+                                     " no NICs" % nic_op,
+                                     errors.ECODE_INVAL)
          if nic_op < 0 or nic_op >= len(instance.nics):
            raise errors.OpPrereqError("Invalid NIC index %s, valid values"
                                       " are 0 to %d" %
-                                     (nic_op, len(instance.nics)),
+                                     (nic_op, len(instance.nics) - 1),
                                       errors.ECODE_INVAL)
          old_nic_params = instance.nics[nic_op].nicparams
          old_nic_ip = instance.nics[nic_op].ip
@@ -7673,10 +7826,12 @@ class LUSetInstanceParams(LogicalUnit):
                                       errors.ECODE_INVAL)
          elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
            # otherwise generate the mac
-          nic_dict['mac'] = self.cfg.GenerateMAC()
+          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
          else:
            # or validate/reserve the current one
-          if self.cfg.IsMacInUse(nic_mac):
+          try:
+            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
+          except errors.ReservationError:
              raise errors.OpPrereqError("MAC address %s already in use"
                                         " in cluster" % nic_mac,
                                         errors.ECODE_NOTUNIQUE)
@@ -7686,7 +7841,7 @@ class LUSetInstanceParams(LogicalUnit):
        raise errors.OpPrereqError("Disk operations not supported for"
                                   " diskless instances",
                                   errors.ECODE_INVAL)
-    for disk_op, disk_dict in self.op.disks:
+    for disk_op, _ in self.op.disks:
        if disk_op == constants.DDM_REMOVE:
          if len(instance.disks) == 1:
            raise errors.OpPrereqError("Cannot remove the last disk of"
@@ -7730,7 +7885,6 @@ class LUSetInstanceParams(LogicalUnit):
  
      result = []
      instance = self.instance
-    cluster = self.cluster
      # disk changes
      for disk_op, disk_dict in self.op.disks:
        if disk_op == constants.DDM_REMOVE:
@@ -7805,8 +7959,8 @@ class LUSetInstanceParams(LogicalUnit):
          for key in 'mac', 'ip':
            if key in nic_dict:
              setattr(instance.nics[nic_op], key, nic_dict[key])
-        if nic_op in self.nic_pnew:
-          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
+        if nic_op in self.nic_pinst:
+          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
          for key, val in nic_dict.iteritems():
            result.append(("nic.%s/%d" % (key, nic_op), val))
  
@@ -8016,8 +8170,10 @@ class LUExportInstance(LogicalUnit):
          feedback_fn("Exporting snapshot %s from %s to %s" %
                      (idx, src_node, dst_node.name))
          if dev:
+          # FIXME: pass debug from opcode to backend
            result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
-                                                 instance, cluster_name, idx)
+                                                 instance, cluster_name,
+                                                 idx, 0)
            msg = result.fail_msg
            if msg:
              self.LogWarning("Could not export disk/%s from node %s to"
@@ -8121,7 +8277,7 @@ class LURemoveExport(NoHooksLU):
                    " Domain Name.")
  
  
-class TagsLU(NoHooksLU):
+class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
    """Generic tags LU.
  
    This is an abstract class which is the parent of all the other tags LUs.
@@ -8332,6 +8488,8 @@ class IAllocator(object):
        easy usage
  
    """
+  # pylint: disable-msg=R0902
+  # lots of instance attributes
    _ALLO_KEYS = [
      "mem_size", "disks", "disk_template",
      "os", "tags", "nics", "vcpus", "hypervisor",