Add error code for temporary lack of resources

[ganeti-local] / lib / cmdlib.py
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index e029450..d29631a 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -138,13 +138,18 @@ class LogicalUnit(object):
      self.owned_locks = context.glm.list_owned
      self.context = context
      self.rpc = rpc_runner
-    # Dicts used to declare locking needs to mcpu
+
+    # Dictionaries used to declare locking needs to mcpu
      self.needed_locks = None
      self.share_locks = dict.fromkeys(locking.LEVELS, 0)
+    self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
+
      self.add_locks = {}
      self.remove_locks = {}
+
      # Used to force good behavior when calling helper functions
      self.recalculate_locks = {}
+
      # logging
      self.Log = processor.Log # pylint: disable=C0103
      self.LogWarning = processor.LogWarning # pylint: disable=C0103
@@ -2228,6 +2233,11 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        locking.LEVEL_INSTANCE: inst_names,
        locking.LEVEL_NODEGROUP: [self.group_uuid],
        locking.LEVEL_NODE: [],
+
+      # This opcode is run by watcher every five minutes and acquires all nodes
+      # for a group. It doesn't run for a long time, so it's better to acquire
+      # the node allocation lock as well.
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
        }
  
      self.share_locks = _ShareAll()
@@ -3675,6 +3685,12 @@ class LUGroupVerifyDisks(NoHooksLU):
        locking.LEVEL_INSTANCE: [],
        locking.LEVEL_NODEGROUP: [],
        locking.LEVEL_NODE: [],
+
+      # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
+      # starts one instance of this opcode for every group, which means all
+      # nodes will be locked for a short amount of time, so it's better to
+      # acquire the node allocation lock as well.
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
        }
  
    def DeclareLocks(self, level):
@@ -3781,6 +3797,8 @@ class LUClusterRepairDiskSizes(NoHooksLU):
    def ExpandNames(self):
      if self.op.instances:
        self.wanted_names = _GetWantedInstances(self, self.op.instances)
+      # Not getting the node allocation lock as only a specific set of
+      # instances (and their nodes) is going to be acquired
        self.needed_locks = {
          locking.LEVEL_NODE_RES: [],
          locking.LEVEL_INSTANCE: self.wanted_names,
@@ -3791,10 +3809,15 @@ class LUClusterRepairDiskSizes(NoHooksLU):
        self.needed_locks = {
          locking.LEVEL_NODE_RES: locking.ALL_SET,
          locking.LEVEL_INSTANCE: locking.ALL_SET,
+
+        # This opcode is acquires the node locks for all instances
+        locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
          }
+
      self.share_locks = {
        locking.LEVEL_NODE_RES: 1,
        locking.LEVEL_INSTANCE: 0,
+      locking.LEVEL_NODE_ALLOC: 1,
        }
  
    def DeclareLocks(self, level):
@@ -4036,16 +4059,15 @@ class LUClusterSetParams(LogicalUnit):
    def ExpandNames(self):
      # FIXME: in the future maybe other cluster params won't require checking on
      # all nodes to be modified.
+    # FIXME: This opcode changes cluster-wide settings. Is acquiring all
+    # resource locks the right thing, shouldn't it be the BGL instead?
      self.needed_locks = {
        locking.LEVEL_NODE: locking.ALL_SET,
        locking.LEVEL_INSTANCE: locking.ALL_SET,
        locking.LEVEL_NODEGROUP: locking.ALL_SET,
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
      }
-    self.share_locks = {
-        locking.LEVEL_NODE: 1,
-        locking.LEVEL_INSTANCE: 1,
-        locking.LEVEL_NODEGROUP: 1,
-    }
+    self.share_locks = _ShareAll()
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -4592,8 +4614,9 @@ class LUClusterRedistConf(NoHooksLU):
    def ExpandNames(self):
      self.needed_locks = {
        locking.LEVEL_NODE: locking.ALL_SET,
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
      }
-    self.share_locks[locking.LEVEL_NODE] = 1
+    self.share_locks = _ShareAll()
  
    def Exec(self, feedback_fn):
      """Redistribute the configuration.
@@ -4791,6 +4814,11 @@ class LUOobCommand(NoHooksLU):
        locking.LEVEL_NODE: lock_names,
        }
  
+    if not self.op.node_names:
+      # Acquire node allocation lock only if all nodes are affected
+      self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+      self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
+
    def CheckPrereq(self):
      """Check prerequisites.
  
@@ -5223,6 +5251,7 @@ class _NodeQuery(_QueryBase):
      if self.do_locking:
        # If any non-static field is requested we need to lock the nodes
        lu.needed_locks[locking.LEVEL_NODE] = self.wanted
+      lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
  
    def DeclareLocks(self, lu, level):
      pass
@@ -5317,13 +5346,16 @@ class LUNodeQueryvols(NoHooksLU):
  
    def ExpandNames(self):
      self.share_locks = _ShareAll()
-    self.needed_locks = {}
  
-    if not self.op.nodes:
-      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+    if self.op.nodes:
+      self.needed_locks = {
+        locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
+        }
      else:
-      self.needed_locks[locking.LEVEL_NODE] = \
-        _GetWantedNodes(self, self.op.nodes)
+      self.needed_locks = {
+        locking.LEVEL_NODE: locking.ALL_SET,
+        locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+        }
  
    def Exec(self, feedback_fn):
      """Computes the list of nodes and their attributes.
@@ -5386,13 +5418,16 @@ class LUNodeQueryStorage(NoHooksLU):
  
    def ExpandNames(self):
      self.share_locks = _ShareAll()
-    self.needed_locks = {}
  
      if self.op.nodes:
-      self.needed_locks[locking.LEVEL_NODE] = \
-        _GetWantedNodes(self, self.op.nodes)
+      self.needed_locks = {
+        locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
+        }
      else:
-      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+      self.needed_locks = {
+        locking.LEVEL_NODE: locking.ALL_SET,
+        locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+        }
  
    def Exec(self, feedback_fn):
      """Computes the list of nodes and their attributes.
@@ -6018,19 +6053,28 @@ class LUNodeSetParams(LogicalUnit):
  
    def ExpandNames(self):
      if self.lock_all:
-      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
+      self.needed_locks = {
+        locking.LEVEL_NODE: locking.ALL_SET,
+
+        # Block allocations when all nodes are locked
+        locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+        }
      else:
-      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
+      self.needed_locks = {
+        locking.LEVEL_NODE: self.op.node_name,
+        }
  
      # Since modifying a node can have severe effects on currently running
      # operations the resource lock is at least acquired in shared mode
      self.needed_locks[locking.LEVEL_NODE_RES] = \
        self.needed_locks[locking.LEVEL_NODE]
  
-    # Get node resource and instance locks in shared mode; they are not used
-    # for anything but read-only access
-    self.share_locks[locking.LEVEL_NODE_RES] = 1
-    self.share_locks[locking.LEVEL_INSTANCE] = 1
+    # Get all locks except nodes in shared mode; they are not used for anything
+    # but read-only access
+    self.share_locks = _ShareAll()
+    self.share_locks[locking.LEVEL_NODE] = 0
+    self.share_locks[locking.LEVEL_NODE_RES] = 0
+    self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
  
      if self.lock_instances:
        self.needed_locks[locking.LEVEL_INSTANCE] = \
@@ -7350,6 +7394,7 @@ class LUInstanceRecreateDisks(LogicalUnit):
    def ExpandNames(self):
      self._ExpandAndLockInstance()
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+
      if self.op.nodes:
        self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
        self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
@@ -7358,6 +7403,8 @@ class LUInstanceRecreateDisks(LogicalUnit):
        if self.op.iallocator:
          # iallocator will select a new node in the same group
          self.needed_locks[locking.LEVEL_NODEGROUP] = []
+        self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+
      self.needed_locks[locking.LEVEL_NODE_RES] = []
  
    def DeclareLocks(self, level):
@@ -7387,6 +7434,8 @@ class LUInstanceRecreateDisks(LogicalUnit):
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
            self.needed_locks[locking.LEVEL_NODE].extend(
              self.cfg.GetNodeGroup(group_uuid).members)
+
+        assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
        elif not self.op.nodes:
          self._LockInstancesNodes(primary_only=False)
      elif level == locking.LEVEL_NODE_RES:
@@ -7477,6 +7526,9 @@ class LUInstanceRecreateDisks(LogicalUnit):
        # Release unneeded node and node resource locks
        _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
        _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
+      _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
+
+    assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
  
    def Exec(self, feedback_fn):
      """Recreate the disks.
@@ -7620,6 +7672,7 @@ class LUInstanceRename(LogicalUnit):
      # Change the instance lock. This is definitely safe while we hold the BGL.
      # Otherwise the new lock would have to be added in acquired mode.
      assert self.REQ_BGL
+    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
      self.glm.remove(locking.LEVEL_INSTANCE, old_name)
      self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
  
@@ -7797,6 +7850,10 @@ def _ExpandNamesForMigration(lu):
    lu.needed_locks[locking.LEVEL_NODE_RES] = []
    lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
  
+  # The node allocation lock is actually only needed for replicated instances
+  # (e.g. DRBD8) and if an iallocator is used.
+  lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
+
  
  def _DeclareLocksForMigration(lu, level):
    """Declares locks for L{TLMigrateInstance}.
@@ -7805,17 +7862,26 @@ def _DeclareLocksForMigration(lu, level):
    @param level: Lock level
  
    """
-  if level == locking.LEVEL_NODE:
+  if level == locking.LEVEL_NODE_ALLOC:
+    assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
+
      instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
+
      if instance.disk_template in constants.DTS_EXT_MIRROR:
        if lu.op.target_node is None:
          lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+        lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
        else:
          lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
                                                 lu.op.target_node]
        del lu.recalculate_locks[locking.LEVEL_NODE]
      else:
        lu._LockInstancesNodes() # pylint: disable=W0212
+
+  elif level == locking.LEVEL_NODE:
+    # Node locks are declared together with the node allocation lock
+    assert lu.needed_locks[locking.LEVEL_NODE]
+
    elif level == locking.LEVEL_NODE_RES:
      # Copy node locks
      lu.needed_locks[locking.LEVEL_NODE_RES] = \
@@ -8289,6 +8355,8 @@ class TLMigrateInstance(Tasklet):
                                   errors.ECODE_STATE)
  
      if instance.disk_template in constants.DTS_EXT_MIRROR:
+      assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
+
        _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
  
        if self.lu.op.iallocator:
@@ -8320,8 +8388,11 @@ class TLMigrateInstance(Tasklet):
          # in the LU
          _ReleaseLocks(self.lu, locking.LEVEL_NODE,
                        keep=[instance.primary_node, self.target_node])
+        _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
  
      else:
+      assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
+
        secondary_nodes = instance.secondary_nodes
        if not secondary_nodes:
          raise errors.ConfigurationError("No secondary node but using"
@@ -8423,6 +8494,8 @@ class TLMigrateInstance(Tasklet):
      """Run the allocator based on input opcode.
  
      """
+    assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
+
      # FIXME: add a self.ignore_ipolicy option
      req = iallocator.IAReqRelocate(name=self.instance_name,
                                     relocate_from=[self.instance.primary_node])
@@ -9467,13 +9540,15 @@ def _CheckOSParams(lu, required, nodenames, osname, osparams):
                   osname, node)
  
  
-def _CreateInstanceAllocRequest(op, disks, nics, beparams):
+def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
    """Wrapper around IAReqInstanceAlloc.
  
    @param op: The instance opcode
    @param disks: The computed disks
    @param nics: The computed nics
    @param beparams: The full filled beparams
+  @param node_whitelist: List of nodes which should appear as online to the
+    allocator (unless the node is already marked offline)
  
    @returns: A filled L{iallocator.IAReqInstanceAlloc}
  
@@ -9488,17 +9563,18 @@ def _CreateInstanceAllocRequest(op, disks, nics, beparams):
                                         spindle_use=spindle_use,
                                         disks=disks,
                                         nics=[n.ToDict() for n in nics],
-                                       hypervisor=op.hypervisor)
+                                       hypervisor=op.hypervisor,
+                                       node_whitelist=node_whitelist)
  
  
-def _ComputeNics(op, cluster, default_ip, cfg, proc):
+def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
    """Computes the nics.
  
    @param op: The instance opcode
    @param cluster: Cluster configuration object
    @param default_ip: The default ip to assign
    @param cfg: An instance of the configuration object
-  @param proc: The executer instance
+  @param ec_id: Execution context ID
  
    @returns: The build up nics
  
@@ -9558,7 +9634,7 @@ def _ComputeNics(op, cluster, default_ip, cfg, proc):
  
        try:
          # TODO: We need to factor this out
-        cfg.ReserveMAC(mac, proc.GetECId())
+        cfg.ReserveMAC(mac, ec_id)
        except errors.ReservationError:
          raise errors.OpPrereqError("MAC address %s already in use"
                                     " in cluster" % mac,
@@ -9821,7 +9897,11 @@ class LUInstanceCreate(LogicalUnit):
        # specifying a group on instance creation and then selecting nodes from
        # that group
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
-      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
+      self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+
+      if self.op.opportunistic_locking:
+        self.opportunistic_locks[locking.LEVEL_NODE] = True
+        self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
      else:
        self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
        nodelist = [self.op.pnode]
@@ -9829,9 +9909,6 @@ class LUInstanceCreate(LogicalUnit):
          self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
          nodelist.append(self.op.snode)
        self.needed_locks[locking.LEVEL_NODE] = nodelist
-      # Lock resources of instance's primary and secondary nodes (copy to
-      # prevent accidential modification)
-      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
  
      # in case of import lock the source node too
      if self.op.mode == constants.INSTANCE_IMPORT:
@@ -9843,6 +9920,7 @@ class LUInstanceCreate(LogicalUnit):
  
        if src_node is None:
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+        self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
          self.op.src_node = None
          if os.path.isabs(src_path):
            raise errors.OpPrereqError("Importing an instance from a path"
@@ -9856,23 +9934,40 @@ class LUInstanceCreate(LogicalUnit):
            self.op.src_path = src_path = \
              utils.PathJoin(pathutils.EXPORT_DIR, src_path)
  
+    self.needed_locks[locking.LEVEL_NODE_RES] = \
+      _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
+
    def _RunAllocator(self):
      """Run the allocator based on input opcode.
  
      """
+    if self.op.opportunistic_locking:
+      # Only consider nodes for which a lock is held
+      node_whitelist = self.owned_locks(locking.LEVEL_NODE)
+    else:
+      node_whitelist = None
+
      #TODO Export network to iallocator so that it chooses a pnode
      #     in a nodegroup that has the desired network connected to
      req = _CreateInstanceAllocRequest(self.op, self.disks,
-                                      self.nics, self.be_full)
+                                      self.nics, self.be_full,
+                                      node_whitelist)
      ial = iallocator.IAllocator(self.cfg, self.rpc, req)
  
      ial.Run(self.op.iallocator)
  
      if not ial.success:
+      # When opportunistic locks are used only a temporary failure is generated
+      if self.op.opportunistic_locking:
+        ecode = errors.ECODE_TEMP_NORES
+      else:
+        ecode = errors.ECODE_NORES
+
        raise errors.OpPrereqError("Can't compute nodes using"
                                   " iallocator '%s': %s" %
                                   (self.op.iallocator, ial.info),
-                                 errors.ECODE_NORES)
+                                 ecode)
+
      self.op.pnode = ial.result[0]
      self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
                   self.op.instance_name, self.op.iallocator,
@@ -10173,7 +10268,7 @@ class LUInstanceCreate(LogicalUnit):
  
      # NIC buildup
      self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
-                             self.proc)
+                             self.proc.GetECId())
  
      # disk checks/pre-build
      default_vg = self.cfg.GetVGName()
@@ -10226,12 +10321,14 @@ class LUInstanceCreate(LogicalUnit):
        self._RunAllocator()
  
      # Release all unneeded node locks
-    _ReleaseLocks(self, locking.LEVEL_NODE,
-                  keep=filter(None, [self.op.pnode, self.op.snode,
-                                     self.op.src_node]))
-    _ReleaseLocks(self, locking.LEVEL_NODE_RES,
-                  keep=filter(None, [self.op.pnode, self.op.snode,
-                                     self.op.src_node]))
+    keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
+    _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
+    _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
+    _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
+
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES)), \
+      "Node locks differ from node resource locks"
  
      #### node related checks
  
@@ -10456,6 +10553,7 @@ class LUInstanceCreate(LogicalUnit):
      assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
                  self.owned_locks(locking.LEVEL_NODE)), \
        "Node locks differ from node resource locks"
+    assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
  
      ht_kind = self.op.hypervisor
      if ht_kind in constants.HTS_REQ_PORT:
@@ -10734,11 +10832,19 @@ class LUInstanceMultiAlloc(NoHooksLU):
  
      """
      self.share_locks = _ShareAll()
-    self.needed_locks = {}
+    self.needed_locks = {
+      # iallocator will select nodes and even if no iallocator is used,
+      # collisions with LUInstanceCreate should be avoided
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+      }
  
      if self.op.iallocator:
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
        self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
+
+      if self.op.opportunistic_locking:
+        self.opportunistic_locks[locking.LEVEL_NODE] = True
+        self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
      else:
        nodeslist = []
        for inst in self.op.instances:
@@ -10759,11 +10865,21 @@ class LUInstanceMultiAlloc(NoHooksLU):
      """
      cluster = self.cfg.GetClusterInfo()
      default_vg = self.cfg.GetVGName()
+    ec_id = self.proc.GetECId()
+
+    if self.op.opportunistic_locking:
+      # Only consider nodes for which a lock is held
+      node_whitelist = self.owned_locks(locking.LEVEL_NODE)
+    else:
+      node_whitelist = None
+
      insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
                                           _ComputeNics(op, cluster, None,
-                                                      self.cfg, self.proc),
-                                         _ComputeFullBeParams(op, cluster))
+                                                      self.cfg, ec_id),
+                                         _ComputeFullBeParams(op, cluster),
+                                         node_whitelist)
               for op in self.op.instances]
+
      req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
      ial = iallocator.IAllocator(self.cfg, self.rpc, req)
  
@@ -10952,6 +11068,7 @@ class LUInstanceReplaceDisks(LogicalUnit):
        if self.op.iallocator is not None:
          # iallocator will select a new node in the same group
          self.needed_locks[locking.LEVEL_NODEGROUP] = []
+        self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
  
      self.needed_locks[locking.LEVEL_NODE_RES] = []
  
@@ -10978,6 +11095,7 @@ class LUInstanceReplaceDisks(LogicalUnit):
        if self.op.iallocator is not None:
          assert self.op.remote_node is None
          assert not self.needed_locks[locking.LEVEL_NODE]
+        assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
  
          # Lock member nodes of all locked groups
          self.needed_locks[locking.LEVEL_NODE] = \
@@ -10985,7 +11103,10 @@ class LUInstanceReplaceDisks(LogicalUnit):
               for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
               for node_name in self.cfg.GetNodeGroup(group_uuid).members]
        else:
+        assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
+
          self._LockInstancesNodes()
+
      elif level == locking.LEVEL_NODE_RES:
        # Reuse node locks
        self.needed_locks[locking.LEVEL_NODE_RES] = \
@@ -11257,10 +11378,10 @@ class TLReplaceDisks(Tasklet):
      # Release unneeded node and node resource locks
      _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
+    _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
  
      # Release any owned node group
-    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
-      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
+    _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
  
      # Check whether disks are valid
      for disk_idx in self.disks:
@@ -11284,6 +11405,7 @@ class TLReplaceDisks(Tasklet):
             (owned_nodes, self.node_secondary_ip.keys()))
        assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
                self.lu.owned_locks(locking.LEVEL_NODE_RES))
+      assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
  
        owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
        assert list(owned_instances) == [self.instance_name], \
@@ -13281,7 +13403,7 @@ class LUInstanceSetParams(LogicalUnit):
                                   errors.ECODE_STATE)
      disk_sizes = [disk.size for disk in instance.disks]
      disk_sizes.extend(params["size"] for (op, idx, params, private) in
-                      self.diskmod)
+                      self.diskmod if op == constants.DDM_ADD)
      ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
      ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
  
@@ -13674,9 +13796,11 @@ class LUInstanceChangeGroup(LogicalUnit):
  
    def ExpandNames(self):
      self.share_locks = _ShareAll()
+
      self.needed_locks = {
        locking.LEVEL_NODEGROUP: [],
        locking.LEVEL_NODE: [],
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
        }
  
      self._ExpandAndLockInstance()
@@ -13859,6 +13983,9 @@ class _ExportQuery(_QueryBase):
          locking.LEVEL_NODE: self.wanted,
          }
  
+      if not self.names:
+        lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+
    def DeclareLocks(self, lu, level):
      pass
  
@@ -13976,6 +14103,11 @@ class LUBackupExport(LogicalUnit):
        #  - removing the removal operation altogether
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
  
+      # Allocations should be stopped while this LU runs with node locks, but
+      # it doesn't have to be exclusive
+      self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
+      self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+
    def DeclareLocks(self, level):
      """Last minute lock declaration."""
      # All nodes are locked anyway, so nothing to do here.
@@ -14242,11 +14374,19 @@ class LUBackupRemove(NoHooksLU):
    REQ_BGL = False
  
    def ExpandNames(self):
-    self.needed_locks = {}
-    # We need all nodes to be locked in order for RemoveExport to work, but we
-    # don't need to lock the instance itself, as nothing will happen to it (and
-    # we can remove exports also for a removed instance)
-    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+    self.needed_locks = {
+      # We need all nodes to be locked in order for RemoveExport to work, but
+      # we don't need to lock the instance itself, as nothing will happen to it
+      # (and we can remove exports also for a removed instance)
+      locking.LEVEL_NODE: locking.ALL_SET,
+
+      # Removing backups is quick, so blocking allocations is justified
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+      }
+
+    # Allocations should be stopped while this LU runs with node locks, but it
+    # doesn't have to be exclusive
+    self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
  
    def Exec(self, feedback_fn):
      """Remove any export.
@@ -15526,7 +15666,7 @@ class LUTestAllocator(NoHooksLU):
                                   self.op.mode, errors.ECODE_INVAL)
  
      if self.op.direction == constants.IALLOCATOR_DIR_OUT:
-      if self.op.allocator is None:
+      if self.op.iallocator is None:
          raise errors.OpPrereqError("Missing allocator name",
                                     errors.ECODE_INVAL)
      elif self.op.direction != constants.IALLOCATOR_DIR_IN:
@@ -15579,12 +15719,11 @@ class LUTestAllocator(NoHooksLU):
      if self.op.direction == constants.IALLOCATOR_DIR_IN:
        result = ial.in_text
      else:
-      ial.Run(self.op.allocator, validate=False)
+      ial.Run(self.op.iallocator, validate=False)
        result = ial.out_text
      return result
  
  
-# Network LUs
  class LUNetworkAdd(LogicalUnit):
    """Logical unit for creating networks.
  
@@ -15600,18 +15739,25 @@ class LUNetworkAdd(LogicalUnit):
      mn = self.cfg.GetMasterNode()
      return ([mn], [mn])
  
+  def CheckArguments(self):
+    if self.op.mac_prefix:
+      self.op.mac_prefix = \
+        utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
+
    def ExpandNames(self):
      self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
-    self.needed_locks = {}
-    self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
  
-  def CheckPrereq(self):
-    """Check prerequisites.
+    if self.op.conflicts_check:
+      self.share_locks[locking.LEVEL_NODE] = 1
+      self.needed_locks = {
+        locking.LEVEL_NODE: locking.ALL_SET,
+        }
+    else:
+      self.needed_locks = {}
  
-    This checks that the given group name is not an existing node group
-    already.
+    self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
  
-    """
+  def CheckPrereq(self):
      if self.op.network is None:
        raise errors.OpPrereqError("Network must be given",
                                   errors.ECODE_INVAL)
@@ -15622,9 +15768,6 @@ class LUNetworkAdd(LogicalUnit):
        raise errors.OpPrereqError("Network '%s' already defined" %
                                   self.op.network, errors.ECODE_EXISTS)
  
-    if self.op.mac_prefix:
-      utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
-
      # Check tag validity
      for tag in self.op.tags:
        objects.TaggableObject.ValidateTag(tag)
@@ -15657,7 +15800,7 @@ class LUNetworkAdd(LogicalUnit):
                             mac_prefix=self.op.mac_prefix,
                             network_type=self.op.network_type,
                             uuid=self.network_uuid,
-                           family=4)
+                           family=constants.IP4_VERSION)
      # Initialize the associated address pool
      try:
        pool = network.AddressPool.InitializeNetwork(nobj)
@@ -15667,21 +15810,26 @@ class LUNetworkAdd(LogicalUnit):
      # Check if we need to reserve the nodes and the cluster master IP
      # These may not be allocated to any instances in routed mode, as
      # they wouldn't function anyway.
-    for node in self.cfg.GetAllNodesInfo().values():
-      for ip in [node.primary_ip, node.secondary_ip]:
-        try:
-          pool.Reserve(ip)
-          self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
-
-        except errors.AddressPoolError:
-          pass
+    if self.op.conflicts_check:
+      for node in self.cfg.GetAllNodesInfo().values():
+        for ip in [node.primary_ip, node.secondary_ip]:
+          try:
+            if pool.Contains(ip):
+              pool.Reserve(ip)
+              self.LogInfo("Reserved IP address of node '%s' (%s)",
+                           node.name, ip)
+          except errors.AddressPoolError:
+            self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
+                            node.name, ip)
  
-    master_ip = self.cfg.GetClusterInfo().master_ip
-    try:
-      pool.Reserve(master_ip)
-      self.LogInfo("Reserved cluster master IP (%s)", master_ip)
-    except errors.AddressPoolError:
-      pass
+      master_ip = self.cfg.GetClusterInfo().master_ip
+      try:
+        if pool.Contains(master_ip):
+          pool.Reserve(master_ip)
+          self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
+      except errors.AddressPoolError:
+        self.LogWarning("Cannot reserve cluster master IP address (%s)",
+                        master_ip)
  
      if self.op.add_reserved_ips:
        for ip in self.op.add_reserved_ips:
@@ -15707,10 +15855,14 @@ class LUNetworkRemove(LogicalUnit):
      self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
  
      if not self.network_uuid:
-      raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
+      raise errors.OpPrereqError(("Network '%s' not found" %
+                                  self.op.network_name),
                                   errors.ECODE_INVAL)
+
+    self.share_locks[locking.LEVEL_NODEGROUP] = 1
      self.needed_locks = {
        locking.LEVEL_NETWORK: [self.network_uuid],
+      locking.LEVEL_NODEGROUP: locking.ALL_SET,
        }
  
    def CheckPrereq(self):
@@ -15721,19 +15873,17 @@ class LUNetworkRemove(LogicalUnit):
      cluster.
  
      """
-
      # Verify that the network is not conncted.
      node_groups = [group.name
                     for group in self.cfg.GetAllNodeGroupsInfo().values()
-                   for net in group.networks.keys()
-                   if net == self.network_uuid]
+                   if self.network_uuid in group.networks]
  
      if node_groups:
-      self.LogWarning("Nework '%s' is connected to the following"
-                      " node groups: %s" % (self.op.network_name,
-                      utils.CommaJoin(utils.NiceSort(node_groups))))
-      raise errors.OpPrereqError("Network still connected",
-                                 errors.ECODE_STATE)
+      self.LogWarning("Network '%s' is connected to the following"
+                      " node groups: %s" %
+                      (self.op.network_name,
+                       utils.CommaJoin(utils.NiceSort(node_groups))))
+      raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -15777,11 +15927,11 @@ class LUNetworkSetParams(LogicalUnit):
  
    def ExpandNames(self):
      self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
-    self.network = self.cfg.GetNetwork(self.network_uuid)
-    if self.network is None:
-      raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
-                                 (self.op.network_name, self.network_uuid),
+    if self.network_uuid is None:
+      raise errors.OpPrereqError(("Network '%s' not found" %
+                                  self.op.network_name),
                                   errors.ECODE_INVAL)
+
      self.needed_locks = {
        locking.LEVEL_NETWORK: [self.network_uuid],
        }
@@ -15790,6 +15940,7 @@ class LUNetworkSetParams(LogicalUnit):
      """Check prerequisites.
  
      """
+    self.network = self.cfg.GetNetwork(self.network_uuid)
      self.gateway = self.network.gateway
      self.network_type = self.network.network_type
      self.mac_prefix = self.network.mac_prefix
@@ -15818,8 +15969,8 @@ class LUNetworkSetParams(LogicalUnit):
        if self.op.mac_prefix == constants.VALUE_NONE:
          self.mac_prefix = None
        else:
-        utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
-        self.mac_prefix = self.op.mac_prefix
+        self.mac_prefix = \
+          utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
  
      if self.op.gateway6:
        if self.op.gateway6 == constants.VALUE_NONE:
@@ -16041,14 +16192,12 @@ class LUNetworkConnect(LogicalUnit):
      self.network_link = self.op.network_link
  
      self.network_uuid = self.cfg.LookupNetwork(self.network_name)
-    self.network = self.cfg.GetNetwork(self.network_uuid)
-    if self.network is None:
+    if self.network_uuid is None:
        raise errors.OpPrereqError("Network %s does not exist" %
                                   self.network_name, errors.ECODE_INVAL)
  
      self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
-    self.group = self.cfg.GetNodeGroup(self.group_uuid)
-    if self.group is None:
+    if self.group_uuid is None:
        raise errors.OpPrereqError("Group %s does not exist" %
                                   self.group_name, errors.ECODE_INVAL)
  
@@ -16058,14 +16207,19 @@ class LUNetworkConnect(LogicalUnit):
        }
      self.share_locks[locking.LEVEL_INSTANCE] = 1
  
+    if self.op.conflicts_check:
+      self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
+      self.share_locks[locking.LEVEL_NETWORK] = 1
+
    def DeclareLocks(self, level):
      if level == locking.LEVEL_INSTANCE:
        assert not self.needed_locks[locking.LEVEL_INSTANCE]
  
        # Lock instances optimistically, needs verification once group lock has
        # been acquired
-      self.needed_locks[locking.LEVEL_INSTANCE] = \
-          self.cfg.GetNodeGroupInstances(self.group_uuid)
+      if self.op.conflicts_check:
+        self.needed_locks[locking.LEVEL_INSTANCE] = \
+            self.cfg.GetNodeGroupInstances(self.group_uuid)
  
    def BuildHooksEnv(self):
      ret = {
@@ -16073,7 +16227,6 @@ class LUNetworkConnect(LogicalUnit):
        "GROUP_NETWORK_MODE": self.network_mode,
        "GROUP_NETWORK_LINK": self.network_link,
        }
-    ret.update(_BuildNetworkHookEnvByObject(self.network))
      return ret
  
    def BuildHooksNodes(self):
@@ -16081,6 +16234,10 @@ class LUNetworkConnect(LogicalUnit):
      return (nodes, nodes)
  
    def CheckPrereq(self):
+    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+
+    assert self.group_uuid in owned_groups
+
      l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
                                        for i in value)
  
@@ -16090,6 +16247,7 @@ class LUNetworkConnect(LogicalUnit):
        }
      objects.NIC.CheckParameterSyntax(self.netparams)
  
+    self.group = self.cfg.GetNodeGroup(self.group_uuid)
      #if self.network_mode == constants.NIC_MODE_BRIDGED:
      #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
      self.connected = False
@@ -16099,20 +16257,25 @@ class LUNetworkConnect(LogicalUnit):
        self.connected = True
        return
  
-    pool = network.AddressPool(self.network)
      if self.op.conflicts_check:
-      groupinstances = []
-      for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
-        groupinstances.append(self.cfg.GetInstanceInfo(n))
-      instances = [(instance.name, idx, nic.ip)
-                   for instance in groupinstances
-                   for idx, nic in enumerate(instance.nics)
-                   if (not nic.network and pool.Contains(nic.ip))]
-      if instances:
+      # Check if locked instances are still correct
+      owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+      _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
+
+      nobj = self.cfg.GetNetwork(self.network_uuid)
+      pool = network.AddressPool(nobj)
+      conflicting_instances = []
+
+      for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
+        for idx, nic in enumerate(instance.nics):
+          if pool.Contains(nic.ip):
+            conflicting_instances.append((instance.name, idx, nic.ip))
+
+      if conflicting_instances:
          self.LogWarning("Following occurences use IPs from network %s"
                          " that is about to connect to nodegroup %s: %s" %
                          (self.network_name, self.group.name,
-                        l(instances)))
+                        l(conflicting_instances)))
          raise errors.OpPrereqError("Conflicting IPs found."
                                     " Please remove/modify"
                                     " corresponding NICs",
@@ -16139,14 +16302,12 @@ class LUNetworkDisconnect(LogicalUnit):
      self.group_name = self.op.group_name
  
      self.network_uuid = self.cfg.LookupNetwork(self.network_name)
-    self.network = self.cfg.GetNetwork(self.network_uuid)
-    if self.network is None:
+    if self.network_uuid is None:
        raise errors.OpPrereqError("Network %s does not exist" %
                                   self.network_name, errors.ECODE_INVAL)
  
      self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
-    self.group = self.cfg.GetNodeGroup(self.group_uuid)
-    if self.group is None:
+    if self.group_uuid is None:
        raise errors.OpPrereqError("Group %s does not exist" %
                                   self.group_name, errors.ECODE_INVAL)
  
@@ -16162,14 +16323,14 @@ class LUNetworkDisconnect(LogicalUnit):
  
        # Lock instances optimistically, needs verification once group lock has
        # been acquired
-      self.needed_locks[locking.LEVEL_INSTANCE] = \
+      if self.op.conflicts_check:
+        self.needed_locks[locking.LEVEL_INSTANCE] = \
            self.cfg.GetNodeGroupInstances(self.group_uuid)
  
    def BuildHooksEnv(self):
      ret = {
        "GROUP_NAME": self.group_name,
        }
-    ret.update(_BuildNetworkHookEnvByObject(self.network))
      return ret
  
    def BuildHooksNodes(self):
@@ -16177,9 +16338,14 @@ class LUNetworkDisconnect(LogicalUnit):
      return (nodes, nodes)
  
    def CheckPrereq(self):
+    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+
+    assert self.group_uuid in owned_groups
+
      l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
                                        for i in value)
  
+    self.group = self.cfg.GetNodeGroup(self.group_uuid)
      self.connected = True
      if self.network_uuid not in self.group.networks:
        self.LogWarning("Network '%s' is not mapped to group '%s'",
@@ -16188,19 +16354,23 @@ class LUNetworkDisconnect(LogicalUnit):
        return
  
      if self.op.conflicts_check:
-      groupinstances = []
-      for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
-        groupinstances.append(self.cfg.GetInstanceInfo(n))
-      instances = [(instance.name, idx, nic.ip)
-                   for instance in groupinstances
-                   for idx, nic in enumerate(instance.nics)
-                   if nic.network == self.network_name]
-      if instances:
+      # Check if locked instances are still correct
+      owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+      _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
+
+      conflicting_instances = []
+
+      for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
+        for idx, nic in enumerate(instance.nics):
+          if nic.network == self.network_name:
+            conflicting_instances.append((instance.name, idx, nic.ip))
+
+      if conflicting_instances:
          self.LogWarning("Following occurences use IPs from network %s"
                             " that is about to disconnected from the nodegroup"
                             " %s: %s" %
                             (self.network_name, self.group.name,
-                            l(instances)))
+                            l(conflicting_instances)))
          raise errors.OpPrereqError("Conflicting IPs."
                                     " Please remove/modify"
                                     " corresponding NICS",