Add error code for temporary lack of resources
[ganeti-local] / lib / cmdlib.py
index e029450..d29631a 100644 (file)
@@ -138,13 +138,18 @@ class LogicalUnit(object):
     self.owned_locks = context.glm.list_owned
     self.context = context
     self.rpc = rpc_runner
-    # Dicts used to declare locking needs to mcpu
+
+    # Dictionaries used to declare locking needs to mcpu
     self.needed_locks = None
     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
+    self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
+
     self.add_locks = {}
     self.remove_locks = {}
+
     # Used to force good behavior when calling helper functions
     self.recalculate_locks = {}
+
     # logging
     self.Log = processor.Log # pylint: disable=C0103
     self.LogWarning = processor.LogWarning # pylint: disable=C0103
@@ -2228,6 +2233,11 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
       locking.LEVEL_INSTANCE: inst_names,
       locking.LEVEL_NODEGROUP: [self.group_uuid],
       locking.LEVEL_NODE: [],
+
+      # This opcode is run by watcher every five minutes and acquires all nodes
+      # for a group. It doesn't run for a long time, so it's better to acquire
+      # the node allocation lock as well.
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
       }
 
     self.share_locks = _ShareAll()
@@ -3675,6 +3685,12 @@ class LUGroupVerifyDisks(NoHooksLU):
       locking.LEVEL_INSTANCE: [],
       locking.LEVEL_NODEGROUP: [],
       locking.LEVEL_NODE: [],
+
+      # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
+      # starts one instance of this opcode for every group, which means all
+      # nodes will be locked for a short amount of time, so it's better to
+      # acquire the node allocation lock as well.
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
       }
 
   def DeclareLocks(self, level):
@@ -3781,6 +3797,8 @@ class LUClusterRepairDiskSizes(NoHooksLU):
   def ExpandNames(self):
     if self.op.instances:
       self.wanted_names = _GetWantedInstances(self, self.op.instances)
+      # Not getting the node allocation lock as only a specific set of
+      # instances (and their nodes) is going to be acquired
       self.needed_locks = {
         locking.LEVEL_NODE_RES: [],
         locking.LEVEL_INSTANCE: self.wanted_names,
@@ -3791,10 +3809,15 @@ class LUClusterRepairDiskSizes(NoHooksLU):
       self.needed_locks = {
         locking.LEVEL_NODE_RES: locking.ALL_SET,
         locking.LEVEL_INSTANCE: locking.ALL_SET,
+
+        # This opcode is acquires the node locks for all instances
+        locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
         }
+
     self.share_locks = {
       locking.LEVEL_NODE_RES: 1,
       locking.LEVEL_INSTANCE: 0,
+      locking.LEVEL_NODE_ALLOC: 1,
       }
 
   def DeclareLocks(self, level):
@@ -4036,16 +4059,15 @@ class LUClusterSetParams(LogicalUnit):
   def ExpandNames(self):
     # FIXME: in the future maybe other cluster params won't require checking on
     # all nodes to be modified.
+    # FIXME: This opcode changes cluster-wide settings. Is acquiring all
+    # resource locks the right thing, shouldn't it be the BGL instead?
     self.needed_locks = {
       locking.LEVEL_NODE: locking.ALL_SET,
       locking.LEVEL_INSTANCE: locking.ALL_SET,
       locking.LEVEL_NODEGROUP: locking.ALL_SET,
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
     }
-    self.share_locks = {
-        locking.LEVEL_NODE: 1,
-        locking.LEVEL_INSTANCE: 1,
-        locking.LEVEL_NODEGROUP: 1,
-    }
+    self.share_locks = _ShareAll()
 
   def BuildHooksEnv(self):
     """Build hooks env.
@@ -4592,8 +4614,9 @@ class LUClusterRedistConf(NoHooksLU):
   def ExpandNames(self):
     self.needed_locks = {
       locking.LEVEL_NODE: locking.ALL_SET,
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
     }
-    self.share_locks[locking.LEVEL_NODE] = 1
+    self.share_locks = _ShareAll()
 
   def Exec(self, feedback_fn):
     """Redistribute the configuration.
@@ -4791,6 +4814,11 @@ class LUOobCommand(NoHooksLU):
       locking.LEVEL_NODE: lock_names,
       }
 
+    if not self.op.node_names:
+      # Acquire node allocation lock only if all nodes are affected
+      self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+      self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
+
   def CheckPrereq(self):
     """Check prerequisites.
 
@@ -5223,6 +5251,7 @@ class _NodeQuery(_QueryBase):
     if self.do_locking:
       # If any non-static field is requested we need to lock the nodes
       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
+      lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
 
   def DeclareLocks(self, lu, level):
     pass
@@ -5317,13 +5346,16 @@ class LUNodeQueryvols(NoHooksLU):
 
   def ExpandNames(self):
     self.share_locks = _ShareAll()
-    self.needed_locks = {}
 
-    if not self.op.nodes:
-      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+    if self.op.nodes:
+      self.needed_locks = {
+        locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
+        }
     else:
-      self.needed_locks[locking.LEVEL_NODE] = \
-        _GetWantedNodes(self, self.op.nodes)
+      self.needed_locks = {
+        locking.LEVEL_NODE: locking.ALL_SET,
+        locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+        }
 
   def Exec(self, feedback_fn):
     """Computes the list of nodes and their attributes.
@@ -5386,13 +5418,16 @@ class LUNodeQueryStorage(NoHooksLU):
 
   def ExpandNames(self):
     self.share_locks = _ShareAll()
-    self.needed_locks = {}
 
     if self.op.nodes:
-      self.needed_locks[locking.LEVEL_NODE] = \
-        _GetWantedNodes(self, self.op.nodes)
+      self.needed_locks = {
+        locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
+        }
     else:
-      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+      self.needed_locks = {
+        locking.LEVEL_NODE: locking.ALL_SET,
+        locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+        }
 
   def Exec(self, feedback_fn):
     """Computes the list of nodes and their attributes.
@@ -6018,19 +6053,28 @@ class LUNodeSetParams(LogicalUnit):
 
   def ExpandNames(self):
     if self.lock_all:
-      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
+      self.needed_locks = {
+        locking.LEVEL_NODE: locking.ALL_SET,
+
+        # Block allocations when all nodes are locked
+        locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+        }
     else:
-      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
+      self.needed_locks = {
+        locking.LEVEL_NODE: self.op.node_name,
+        }
 
     # Since modifying a node can have severe effects on currently running
     # operations the resource lock is at least acquired in shared mode
     self.needed_locks[locking.LEVEL_NODE_RES] = \
       self.needed_locks[locking.LEVEL_NODE]
 
-    # Get node resource and instance locks in shared mode; they are not used
-    # for anything but read-only access
-    self.share_locks[locking.LEVEL_NODE_RES] = 1
-    self.share_locks[locking.LEVEL_INSTANCE] = 1
+    # Get all locks except nodes in shared mode; they are not used for anything
+    # but read-only access
+    self.share_locks = _ShareAll()
+    self.share_locks[locking.LEVEL_NODE] = 0
+    self.share_locks[locking.LEVEL_NODE_RES] = 0
+    self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
 
     if self.lock_instances:
       self.needed_locks[locking.LEVEL_INSTANCE] = \
@@ -7350,6 +7394,7 @@ class LUInstanceRecreateDisks(LogicalUnit):
   def ExpandNames(self):
     self._ExpandAndLockInstance()
     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+
     if self.op.nodes:
       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
@@ -7358,6 +7403,8 @@ class LUInstanceRecreateDisks(LogicalUnit):
       if self.op.iallocator:
         # iallocator will select a new node in the same group
         self.needed_locks[locking.LEVEL_NODEGROUP] = []
+        self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+
     self.needed_locks[locking.LEVEL_NODE_RES] = []
 
   def DeclareLocks(self, level):
@@ -7387,6 +7434,8 @@ class LUInstanceRecreateDisks(LogicalUnit):
         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
           self.needed_locks[locking.LEVEL_NODE].extend(
             self.cfg.GetNodeGroup(group_uuid).members)
+
+        assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
       elif not self.op.nodes:
         self._LockInstancesNodes(primary_only=False)
     elif level == locking.LEVEL_NODE_RES:
@@ -7477,6 +7526,9 @@ class LUInstanceRecreateDisks(LogicalUnit):
       # Release unneeded node and node resource locks
       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
+      _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
+
+    assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
 
   def Exec(self, feedback_fn):
     """Recreate the disks.
@@ -7620,6 +7672,7 @@ class LUInstanceRename(LogicalUnit):
     # Change the instance lock. This is definitely safe while we hold the BGL.
     # Otherwise the new lock would have to be added in acquired mode.
     assert self.REQ_BGL
+    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
 
@@ -7797,6 +7850,10 @@ def _ExpandNamesForMigration(lu):
   lu.needed_locks[locking.LEVEL_NODE_RES] = []
   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
 
+  # The node allocation lock is actually only needed for replicated instances
+  # (e.g. DRBD8) and if an iallocator is used.
+  lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
+
 
 def _DeclareLocksForMigration(lu, level):
   """Declares locks for L{TLMigrateInstance}.
@@ -7805,17 +7862,26 @@ def _DeclareLocksForMigration(lu, level):
   @param level: Lock level
 
   """
-  if level == locking.LEVEL_NODE:
+  if level == locking.LEVEL_NODE_ALLOC:
+    assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
+
     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
+
     if instance.disk_template in constants.DTS_EXT_MIRROR:
       if lu.op.target_node is None:
         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+        lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
       else:
         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
                                                lu.op.target_node]
       del lu.recalculate_locks[locking.LEVEL_NODE]
     else:
       lu._LockInstancesNodes() # pylint: disable=W0212
+
+  elif level == locking.LEVEL_NODE:
+    # Node locks are declared together with the node allocation lock
+    assert lu.needed_locks[locking.LEVEL_NODE]
+
   elif level == locking.LEVEL_NODE_RES:
     # Copy node locks
     lu.needed_locks[locking.LEVEL_NODE_RES] = \
@@ -8289,6 +8355,8 @@ class TLMigrateInstance(Tasklet):
                                  errors.ECODE_STATE)
 
     if instance.disk_template in constants.DTS_EXT_MIRROR:
+      assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
+
       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
 
       if self.lu.op.iallocator:
@@ -8320,8 +8388,11 @@ class TLMigrateInstance(Tasklet):
         # in the LU
         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
                       keep=[instance.primary_node, self.target_node])
+        _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
 
     else:
+      assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
+
       secondary_nodes = instance.secondary_nodes
       if not secondary_nodes:
         raise errors.ConfigurationError("No secondary node but using"
@@ -8423,6 +8494,8 @@ class TLMigrateInstance(Tasklet):
     """Run the allocator based on input opcode.
 
     """
+    assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
+
     # FIXME: add a self.ignore_ipolicy option
     req = iallocator.IAReqRelocate(name=self.instance_name,
                                    relocate_from=[self.instance.primary_node])
@@ -9467,13 +9540,15 @@ def _CheckOSParams(lu, required, nodenames, osname, osparams):
                  osname, node)
 
 
-def _CreateInstanceAllocRequest(op, disks, nics, beparams):
+def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
   """Wrapper around IAReqInstanceAlloc.
 
   @param op: The instance opcode
   @param disks: The computed disks
   @param nics: The computed nics
   @param beparams: The full filled beparams
+  @param node_whitelist: List of nodes which should appear as online to the
+    allocator (unless the node is already marked offline)
 
   @returns: A filled L{iallocator.IAReqInstanceAlloc}
 
@@ -9488,17 +9563,18 @@ def _CreateInstanceAllocRequest(op, disks, nics, beparams):
                                        spindle_use=spindle_use,
                                        disks=disks,
                                        nics=[n.ToDict() for n in nics],
-                                       hypervisor=op.hypervisor)
+                                       hypervisor=op.hypervisor,
+                                       node_whitelist=node_whitelist)
 
 
-def _ComputeNics(op, cluster, default_ip, cfg, proc):
+def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
   """Computes the nics.
 
   @param op: The instance opcode
   @param cluster: Cluster configuration object
   @param default_ip: The default ip to assign
   @param cfg: An instance of the configuration object
-  @param proc: The executer instance
+  @param ec_id: Execution context ID
 
   @returns: The build up nics
 
@@ -9558,7 +9634,7 @@ def _ComputeNics(op, cluster, default_ip, cfg, proc):
 
       try:
         # TODO: We need to factor this out
-        cfg.ReserveMAC(mac, proc.GetECId())
+        cfg.ReserveMAC(mac, ec_id)
       except errors.ReservationError:
         raise errors.OpPrereqError("MAC address %s already in use"
                                    " in cluster" % mac,
@@ -9821,7 +9897,11 @@ class LUInstanceCreate(LogicalUnit):
       # specifying a group on instance creation and then selecting nodes from
       # that group
       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
-      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
+      self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+
+      if self.op.opportunistic_locking:
+        self.opportunistic_locks[locking.LEVEL_NODE] = True
+        self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
     else:
       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
       nodelist = [self.op.pnode]
@@ -9829,9 +9909,6 @@ class LUInstanceCreate(LogicalUnit):
         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
         nodelist.append(self.op.snode)
       self.needed_locks[locking.LEVEL_NODE] = nodelist
-      # Lock resources of instance's primary and secondary nodes (copy to
-      # prevent accidential modification)
-      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
 
     # in case of import lock the source node too
     if self.op.mode == constants.INSTANCE_IMPORT:
@@ -9843,6 +9920,7 @@ class LUInstanceCreate(LogicalUnit):
 
       if src_node is None:
         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+        self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
         self.op.src_node = None
         if os.path.isabs(src_path):
           raise errors.OpPrereqError("Importing an instance from a path"
@@ -9856,23 +9934,40 @@ class LUInstanceCreate(LogicalUnit):
           self.op.src_path = src_path = \
             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
 
+    self.needed_locks[locking.LEVEL_NODE_RES] = \
+      _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
+
   def _RunAllocator(self):
     """Run the allocator based on input opcode.
 
     """
+    if self.op.opportunistic_locking:
+      # Only consider nodes for which a lock is held
+      node_whitelist = self.owned_locks(locking.LEVEL_NODE)
+    else:
+      node_whitelist = None
+
     #TODO Export network to iallocator so that it chooses a pnode
     #     in a nodegroup that has the desired network connected to
     req = _CreateInstanceAllocRequest(self.op, self.disks,
-                                      self.nics, self.be_full)
+                                      self.nics, self.be_full,
+                                      node_whitelist)
     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
 
     ial.Run(self.op.iallocator)
 
     if not ial.success:
+      # When opportunistic locks are used only a temporary failure is generated
+      if self.op.opportunistic_locking:
+        ecode = errors.ECODE_TEMP_NORES
+      else:
+        ecode = errors.ECODE_NORES
+
       raise errors.OpPrereqError("Can't compute nodes using"
                                  " iallocator '%s': %s" %
                                  (self.op.iallocator, ial.info),
-                                 errors.ECODE_NORES)
+                                 ecode)
+
     self.op.pnode = ial.result[0]
     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
                  self.op.instance_name, self.op.iallocator,
@@ -10173,7 +10268,7 @@ class LUInstanceCreate(LogicalUnit):
 
     # NIC buildup
     self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
-                             self.proc)
+                             self.proc.GetECId())
 
     # disk checks/pre-build
     default_vg = self.cfg.GetVGName()
@@ -10226,12 +10321,14 @@ class LUInstanceCreate(LogicalUnit):
       self._RunAllocator()
 
     # Release all unneeded node locks
-    _ReleaseLocks(self, locking.LEVEL_NODE,
-                  keep=filter(None, [self.op.pnode, self.op.snode,
-                                     self.op.src_node]))
-    _ReleaseLocks(self, locking.LEVEL_NODE_RES,
-                  keep=filter(None, [self.op.pnode, self.op.snode,
-                                     self.op.src_node]))
+    keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
+    _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
+    _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
+    _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
+
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES)), \
+      "Node locks differ from node resource locks"
 
     #### node related checks
 
@@ -10456,6 +10553,7 @@ class LUInstanceCreate(LogicalUnit):
     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
                 self.owned_locks(locking.LEVEL_NODE)), \
       "Node locks differ from node resource locks"
+    assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
 
     ht_kind = self.op.hypervisor
     if ht_kind in constants.HTS_REQ_PORT:
@@ -10734,11 +10832,19 @@ class LUInstanceMultiAlloc(NoHooksLU):
 
     """
     self.share_locks = _ShareAll()
-    self.needed_locks = {}
+    self.needed_locks = {
+      # iallocator will select nodes and even if no iallocator is used,
+      # collisions with LUInstanceCreate should be avoided
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+      }
 
     if self.op.iallocator:
       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
+
+      if self.op.opportunistic_locking:
+        self.opportunistic_locks[locking.LEVEL_NODE] = True
+        self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
     else:
       nodeslist = []
       for inst in self.op.instances:
@@ -10759,11 +10865,21 @@ class LUInstanceMultiAlloc(NoHooksLU):
     """
     cluster = self.cfg.GetClusterInfo()
     default_vg = self.cfg.GetVGName()
+    ec_id = self.proc.GetECId()
+
+    if self.op.opportunistic_locking:
+      # Only consider nodes for which a lock is held
+      node_whitelist = self.owned_locks(locking.LEVEL_NODE)
+    else:
+      node_whitelist = None
+
     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
                                          _ComputeNics(op, cluster, None,
-                                                      self.cfg, self.proc),
-                                         _ComputeFullBeParams(op, cluster))
+                                                      self.cfg, ec_id),
+                                         _ComputeFullBeParams(op, cluster),
+                                         node_whitelist)
              for op in self.op.instances]
+
     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
 
@@ -10952,6 +11068,7 @@ class LUInstanceReplaceDisks(LogicalUnit):
       if self.op.iallocator is not None:
         # iallocator will select a new node in the same group
         self.needed_locks[locking.LEVEL_NODEGROUP] = []
+        self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
 
     self.needed_locks[locking.LEVEL_NODE_RES] = []
 
@@ -10978,6 +11095,7 @@ class LUInstanceReplaceDisks(LogicalUnit):
       if self.op.iallocator is not None:
         assert self.op.remote_node is None
         assert not self.needed_locks[locking.LEVEL_NODE]
+        assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
 
         # Lock member nodes of all locked groups
         self.needed_locks[locking.LEVEL_NODE] = \
@@ -10985,7 +11103,10 @@ class LUInstanceReplaceDisks(LogicalUnit):
              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
       else:
+        assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
+
         self._LockInstancesNodes()
+
     elif level == locking.LEVEL_NODE_RES:
       # Reuse node locks
       self.needed_locks[locking.LEVEL_NODE_RES] = \
@@ -11257,10 +11378,10 @@ class TLReplaceDisks(Tasklet):
     # Release unneeded node and node resource locks
     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
+    _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
 
     # Release any owned node group
-    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
-      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
+    _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
 
     # Check whether disks are valid
     for disk_idx in self.disks:
@@ -11284,6 +11405,7 @@ class TLReplaceDisks(Tasklet):
            (owned_nodes, self.node_secondary_ip.keys()))
       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
               self.lu.owned_locks(locking.LEVEL_NODE_RES))
+      assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
 
       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
       assert list(owned_instances) == [self.instance_name], \
@@ -13281,7 +13403,7 @@ class LUInstanceSetParams(LogicalUnit):
                                  errors.ECODE_STATE)
     disk_sizes = [disk.size for disk in instance.disks]
     disk_sizes.extend(params["size"] for (op, idx, params, private) in
-                      self.diskmod)
+                      self.diskmod if op == constants.DDM_ADD)
     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
 
@@ -13674,9 +13796,11 @@ class LUInstanceChangeGroup(LogicalUnit):
 
   def ExpandNames(self):
     self.share_locks = _ShareAll()
+
     self.needed_locks = {
       locking.LEVEL_NODEGROUP: [],
       locking.LEVEL_NODE: [],
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
       }
 
     self._ExpandAndLockInstance()
@@ -13859,6 +13983,9 @@ class _ExportQuery(_QueryBase):
         locking.LEVEL_NODE: self.wanted,
         }
 
+      if not self.names:
+        lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+
   def DeclareLocks(self, lu, level):
     pass
 
@@ -13976,6 +14103,11 @@ class LUBackupExport(LogicalUnit):
       #  - removing the removal operation altogether
       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
 
+      # Allocations should be stopped while this LU runs with node locks, but
+      # it doesn't have to be exclusive
+      self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
+      self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+
   def DeclareLocks(self, level):
     """Last minute lock declaration."""
     # All nodes are locked anyway, so nothing to do here.
@@ -14242,11 +14374,19 @@ class LUBackupRemove(NoHooksLU):
   REQ_BGL = False
 
   def ExpandNames(self):
-    self.needed_locks = {}
-    # We need all nodes to be locked in order for RemoveExport to work, but we
-    # don't need to lock the instance itself, as nothing will happen to it (and
-    # we can remove exports also for a removed instance)
-    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+    self.needed_locks = {
+      # We need all nodes to be locked in order for RemoveExport to work, but
+      # we don't need to lock the instance itself, as nothing will happen to it
+      # (and we can remove exports also for a removed instance)
+      locking.LEVEL_NODE: locking.ALL_SET,
+
+      # Removing backups is quick, so blocking allocations is justified
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+      }
+
+    # Allocations should be stopped while this LU runs with node locks, but it
+    # doesn't have to be exclusive
+    self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
 
   def Exec(self, feedback_fn):
     """Remove any export.
@@ -15526,7 +15666,7 @@ class LUTestAllocator(NoHooksLU):
                                  self.op.mode, errors.ECODE_INVAL)
 
     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
-      if self.op.allocator is None:
+      if self.op.iallocator is None:
         raise errors.OpPrereqError("Missing allocator name",
                                    errors.ECODE_INVAL)
     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
@@ -15579,12 +15719,11 @@ class LUTestAllocator(NoHooksLU):
     if self.op.direction == constants.IALLOCATOR_DIR_IN:
       result = ial.in_text
     else:
-      ial.Run(self.op.allocator, validate=False)
+      ial.Run(self.op.iallocator, validate=False)
       result = ial.out_text
     return result
 
 
-# Network LUs
 class LUNetworkAdd(LogicalUnit):
   """Logical unit for creating networks.
 
@@ -15600,18 +15739,25 @@ class LUNetworkAdd(LogicalUnit):
     mn = self.cfg.GetMasterNode()
     return ([mn], [mn])
 
+  def CheckArguments(self):
+    if self.op.mac_prefix:
+      self.op.mac_prefix = \
+        utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
+
   def ExpandNames(self):
     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
-    self.needed_locks = {}
-    self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
 
-  def CheckPrereq(self):
-    """Check prerequisites.
+    if self.op.conflicts_check:
+      self.share_locks[locking.LEVEL_NODE] = 1
+      self.needed_locks = {
+        locking.LEVEL_NODE: locking.ALL_SET,
+        }
+    else:
+      self.needed_locks = {}
 
-    This checks that the given group name is not an existing node group
-    already.
+    self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
 
-    """
+  def CheckPrereq(self):
     if self.op.network is None:
       raise errors.OpPrereqError("Network must be given",
                                  errors.ECODE_INVAL)
@@ -15622,9 +15768,6 @@ class LUNetworkAdd(LogicalUnit):
       raise errors.OpPrereqError("Network '%s' already defined" %
                                  self.op.network, errors.ECODE_EXISTS)
 
-    if self.op.mac_prefix:
-      utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
-
     # Check tag validity
     for tag in self.op.tags:
       objects.TaggableObject.ValidateTag(tag)
@@ -15657,7 +15800,7 @@ class LUNetworkAdd(LogicalUnit):
                            mac_prefix=self.op.mac_prefix,
                            network_type=self.op.network_type,
                            uuid=self.network_uuid,
-                           family=4)
+                           family=constants.IP4_VERSION)
     # Initialize the associated address pool
     try:
       pool = network.AddressPool.InitializeNetwork(nobj)
@@ -15667,21 +15810,26 @@ class LUNetworkAdd(LogicalUnit):
     # Check if we need to reserve the nodes and the cluster master IP
     # These may not be allocated to any instances in routed mode, as
     # they wouldn't function anyway.
-    for node in self.cfg.GetAllNodesInfo().values():
-      for ip in [node.primary_ip, node.secondary_ip]:
-        try:
-          pool.Reserve(ip)
-          self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
-
-        except errors.AddressPoolError:
-          pass
+    if self.op.conflicts_check:
+      for node in self.cfg.GetAllNodesInfo().values():
+        for ip in [node.primary_ip, node.secondary_ip]:
+          try:
+            if pool.Contains(ip):
+              pool.Reserve(ip)
+              self.LogInfo("Reserved IP address of node '%s' (%s)",
+                           node.name, ip)
+          except errors.AddressPoolError:
+            self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
+                            node.name, ip)
 
-    master_ip = self.cfg.GetClusterInfo().master_ip
-    try:
-      pool.Reserve(master_ip)
-      self.LogInfo("Reserved cluster master IP (%s)", master_ip)
-    except errors.AddressPoolError:
-      pass
+      master_ip = self.cfg.GetClusterInfo().master_ip
+      try:
+        if pool.Contains(master_ip):
+          pool.Reserve(master_ip)
+          self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
+      except errors.AddressPoolError:
+        self.LogWarning("Cannot reserve cluster master IP address (%s)",
+                        master_ip)
 
     if self.op.add_reserved_ips:
       for ip in self.op.add_reserved_ips:
@@ -15707,10 +15855,14 @@ class LUNetworkRemove(LogicalUnit):
     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
 
     if not self.network_uuid:
-      raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
+      raise errors.OpPrereqError(("Network '%s' not found" %
+                                  self.op.network_name),
                                  errors.ECODE_INVAL)
+
+    self.share_locks[locking.LEVEL_NODEGROUP] = 1
     self.needed_locks = {
       locking.LEVEL_NETWORK: [self.network_uuid],
+      locking.LEVEL_NODEGROUP: locking.ALL_SET,
       }
 
   def CheckPrereq(self):
@@ -15721,19 +15873,17 @@ class LUNetworkRemove(LogicalUnit):
     cluster.
 
     """
-
     # Verify that the network is not conncted.
     node_groups = [group.name
                    for group in self.cfg.GetAllNodeGroupsInfo().values()
-                   for net in group.networks.keys()
-                   if net == self.network_uuid]
+                   if self.network_uuid in group.networks]
 
     if node_groups:
-      self.LogWarning("Nework '%s' is connected to the following"
-                      " node groups: %s" % (self.op.network_name,
-                      utils.CommaJoin(utils.NiceSort(node_groups))))
-      raise errors.OpPrereqError("Network still connected",
-                                 errors.ECODE_STATE)
+      self.LogWarning("Network '%s' is connected to the following"
+                      " node groups: %s" %
+                      (self.op.network_name,
+                       utils.CommaJoin(utils.NiceSort(node_groups))))
+      raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
 
   def BuildHooksEnv(self):
     """Build hooks env.
@@ -15777,11 +15927,11 @@ class LUNetworkSetParams(LogicalUnit):
 
   def ExpandNames(self):
     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
-    self.network = self.cfg.GetNetwork(self.network_uuid)
-    if self.network is None:
-      raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
-                                 (self.op.network_name, self.network_uuid),
+    if self.network_uuid is None:
+      raise errors.OpPrereqError(("Network '%s' not found" %
+                                  self.op.network_name),
                                  errors.ECODE_INVAL)
+
     self.needed_locks = {
       locking.LEVEL_NETWORK: [self.network_uuid],
       }
@@ -15790,6 +15940,7 @@ class LUNetworkSetParams(LogicalUnit):
     """Check prerequisites.
 
     """
+    self.network = self.cfg.GetNetwork(self.network_uuid)
     self.gateway = self.network.gateway
     self.network_type = self.network.network_type
     self.mac_prefix = self.network.mac_prefix
@@ -15818,8 +15969,8 @@ class LUNetworkSetParams(LogicalUnit):
       if self.op.mac_prefix == constants.VALUE_NONE:
         self.mac_prefix = None
       else:
-        utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
-        self.mac_prefix = self.op.mac_prefix
+        self.mac_prefix = \
+          utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
 
     if self.op.gateway6:
       if self.op.gateway6 == constants.VALUE_NONE:
@@ -16041,14 +16192,12 @@ class LUNetworkConnect(LogicalUnit):
     self.network_link = self.op.network_link
 
     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
-    self.network = self.cfg.GetNetwork(self.network_uuid)
-    if self.network is None:
+    if self.network_uuid is None:
       raise errors.OpPrereqError("Network %s does not exist" %
                                  self.network_name, errors.ECODE_INVAL)
 
     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
-    self.group = self.cfg.GetNodeGroup(self.group_uuid)
-    if self.group is None:
+    if self.group_uuid is None:
       raise errors.OpPrereqError("Group %s does not exist" %
                                  self.group_name, errors.ECODE_INVAL)
 
@@ -16058,14 +16207,19 @@ class LUNetworkConnect(LogicalUnit):
       }
     self.share_locks[locking.LEVEL_INSTANCE] = 1
 
+    if self.op.conflicts_check:
+      self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
+      self.share_locks[locking.LEVEL_NETWORK] = 1
+
   def DeclareLocks(self, level):
     if level == locking.LEVEL_INSTANCE:
       assert not self.needed_locks[locking.LEVEL_INSTANCE]
 
       # Lock instances optimistically, needs verification once group lock has
       # been acquired
-      self.needed_locks[locking.LEVEL_INSTANCE] = \
-          self.cfg.GetNodeGroupInstances(self.group_uuid)
+      if self.op.conflicts_check:
+        self.needed_locks[locking.LEVEL_INSTANCE] = \
+            self.cfg.GetNodeGroupInstances(self.group_uuid)
 
   def BuildHooksEnv(self):
     ret = {
@@ -16073,7 +16227,6 @@ class LUNetworkConnect(LogicalUnit):
       "GROUP_NETWORK_MODE": self.network_mode,
       "GROUP_NETWORK_LINK": self.network_link,
       }
-    ret.update(_BuildNetworkHookEnvByObject(self.network))
     return ret
 
   def BuildHooksNodes(self):
@@ -16081,6 +16234,10 @@ class LUNetworkConnect(LogicalUnit):
     return (nodes, nodes)
 
   def CheckPrereq(self):
+    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+
+    assert self.group_uuid in owned_groups
+
     l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
                                       for i in value)
 
@@ -16090,6 +16247,7 @@ class LUNetworkConnect(LogicalUnit):
       }
     objects.NIC.CheckParameterSyntax(self.netparams)
 
+    self.group = self.cfg.GetNodeGroup(self.group_uuid)
     #if self.network_mode == constants.NIC_MODE_BRIDGED:
     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
     self.connected = False
@@ -16099,20 +16257,25 @@ class LUNetworkConnect(LogicalUnit):
       self.connected = True
       return
 
-    pool = network.AddressPool(self.network)
     if self.op.conflicts_check:
-      groupinstances = []
-      for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
-        groupinstances.append(self.cfg.GetInstanceInfo(n))
-      instances = [(instance.name, idx, nic.ip)
-                   for instance in groupinstances
-                   for idx, nic in enumerate(instance.nics)
-                   if (not nic.network and pool.Contains(nic.ip))]
-      if instances:
+      # Check if locked instances are still correct
+      owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+      _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
+
+      nobj = self.cfg.GetNetwork(self.network_uuid)
+      pool = network.AddressPool(nobj)
+      conflicting_instances = []
+
+      for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
+        for idx, nic in enumerate(instance.nics):
+          if pool.Contains(nic.ip):
+            conflicting_instances.append((instance.name, idx, nic.ip))
+
+      if conflicting_instances:
         self.LogWarning("Following occurences use IPs from network %s"
                         " that is about to connect to nodegroup %s: %s" %
                         (self.network_name, self.group.name,
-                        l(instances)))
+                        l(conflicting_instances)))
         raise errors.OpPrereqError("Conflicting IPs found."
                                    " Please remove/modify"
                                    " corresponding NICs",
@@ -16139,14 +16302,12 @@ class LUNetworkDisconnect(LogicalUnit):
     self.group_name = self.op.group_name
 
     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
-    self.network = self.cfg.GetNetwork(self.network_uuid)
-    if self.network is None:
+    if self.network_uuid is None:
       raise errors.OpPrereqError("Network %s does not exist" %
                                  self.network_name, errors.ECODE_INVAL)
 
     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
-    self.group = self.cfg.GetNodeGroup(self.group_uuid)
-    if self.group is None:
+    if self.group_uuid is None:
       raise errors.OpPrereqError("Group %s does not exist" %
                                  self.group_name, errors.ECODE_INVAL)
 
@@ -16162,14 +16323,14 @@ class LUNetworkDisconnect(LogicalUnit):
 
       # Lock instances optimistically, needs verification once group lock has
       # been acquired
-      self.needed_locks[locking.LEVEL_INSTANCE] = \
+      if self.op.conflicts_check:
+        self.needed_locks[locking.LEVEL_INSTANCE] = \
           self.cfg.GetNodeGroupInstances(self.group_uuid)
 
   def BuildHooksEnv(self):
     ret = {
       "GROUP_NAME": self.group_name,
       }
-    ret.update(_BuildNetworkHookEnvByObject(self.network))
     return ret
 
   def BuildHooksNodes(self):
@@ -16177,9 +16338,14 @@ class LUNetworkDisconnect(LogicalUnit):
     return (nodes, nodes)
 
   def CheckPrereq(self):
+    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+
+    assert self.group_uuid in owned_groups
+
     l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
                                       for i in value)
 
+    self.group = self.cfg.GetNodeGroup(self.group_uuid)
     self.connected = True
     if self.network_uuid not in self.group.networks:
       self.LogWarning("Network '%s' is not mapped to group '%s'",
@@ -16188,19 +16354,23 @@ class LUNetworkDisconnect(LogicalUnit):
       return
 
     if self.op.conflicts_check:
-      groupinstances = []
-      for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
-        groupinstances.append(self.cfg.GetInstanceInfo(n))
-      instances = [(instance.name, idx, nic.ip)
-                   for instance in groupinstances
-                   for idx, nic in enumerate(instance.nics)
-                   if nic.network == self.network_name]
-      if instances:
+      # Check if locked instances are still correct
+      owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+      _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
+
+      conflicting_instances = []
+
+      for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
+        for idx, nic in enumerate(instance.nics):
+          if nic.network == self.network_name:
+            conflicting_instances.append((instance.name, idx, nic.ip))
+
+      if conflicting_instances:
         self.LogWarning("Following occurences use IPs from network %s"
                            " that is about to disconnected from the nodegroup"
                            " %s: %s" %
                            (self.network_name, self.group.name,
-                            l(instances)))
+                            l(conflicting_instances)))
         raise errors.OpPrereqError("Conflicting IPs."
                                    " Please remove/modify"
                                    " corresponding NICS",