LUInstanceReplaceDisks: Acquire node allocation lock
[ganeti-local] / lib / cmdlib.py
index b627325..253a703 100644 (file)
@@ -138,13 +138,18 @@ class LogicalUnit(object):
     self.owned_locks = context.glm.list_owned
     self.context = context
     self.rpc = rpc_runner
-    # Dicts used to declare locking needs to mcpu
+
+    # Dictionaries used to declare locking needs to mcpu
     self.needed_locks = None
     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
+    self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
+
     self.add_locks = {}
     self.remove_locks = {}
+
     # Used to force good behavior when calling helper functions
     self.recalculate_locks = {}
+
     # logging
     self.Log = processor.Log # pylint: disable=C0103
     self.LogWarning = processor.LogWarning # pylint: disable=C0103
@@ -4036,16 +4041,15 @@ class LUClusterSetParams(LogicalUnit):
   def ExpandNames(self):
     # FIXME: in the future maybe other cluster params won't require checking on
     # all nodes to be modified.
+    # FIXME: This opcode changes cluster-wide settings. Is acquiring all
+    # resource locks the right thing, shouldn't it be the BGL instead?
     self.needed_locks = {
       locking.LEVEL_NODE: locking.ALL_SET,
       locking.LEVEL_INSTANCE: locking.ALL_SET,
       locking.LEVEL_NODEGROUP: locking.ALL_SET,
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
     }
-    self.share_locks = {
-        locking.LEVEL_NODE: 1,
-        locking.LEVEL_INSTANCE: 1,
-        locking.LEVEL_NODEGROUP: 1,
-    }
+    self.share_locks = _ShareAll()
 
   def BuildHooksEnv(self):
     """Build hooks env.
@@ -5223,6 +5227,7 @@ class _NodeQuery(_QueryBase):
     if self.do_locking:
       # If any non-static field is requested we need to lock the nodes
       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
+      lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
 
   def DeclareLocks(self, lu, level):
     pass
@@ -7801,6 +7806,10 @@ def _ExpandNamesForMigration(lu):
   lu.needed_locks[locking.LEVEL_NODE_RES] = []
   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
 
+  # The node allocation lock is actually only needed for replicated instances
+  # (e.g. DRBD8) and if an iallocator is used.
+  lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
+
 
 def _DeclareLocksForMigration(lu, level):
   """Declares locks for L{TLMigrateInstance}.
@@ -7809,17 +7818,26 @@ def _DeclareLocksForMigration(lu, level):
   @param level: Lock level
 
   """
-  if level == locking.LEVEL_NODE:
+  if level == locking.LEVEL_NODE_ALLOC:
+    assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
+
     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
+
     if instance.disk_template in constants.DTS_EXT_MIRROR:
       if lu.op.target_node is None:
         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+        lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
       else:
         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
                                                lu.op.target_node]
       del lu.recalculate_locks[locking.LEVEL_NODE]
     else:
       lu._LockInstancesNodes() # pylint: disable=W0212
+
+  elif level == locking.LEVEL_NODE:
+    # Node locks are declared together with the node allocation lock
+    assert lu.needed_locks[locking.LEVEL_NODE]
+
   elif level == locking.LEVEL_NODE_RES:
     # Copy node locks
     lu.needed_locks[locking.LEVEL_NODE_RES] = \
@@ -8293,6 +8311,8 @@ class TLMigrateInstance(Tasklet):
                                  errors.ECODE_STATE)
 
     if instance.disk_template in constants.DTS_EXT_MIRROR:
+      assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
+
       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
 
       if self.lu.op.iallocator:
@@ -8324,8 +8344,11 @@ class TLMigrateInstance(Tasklet):
         # in the LU
         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
                       keep=[instance.primary_node, self.target_node])
+        _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
 
     else:
+      assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
+
       secondary_nodes = instance.secondary_nodes
       if not secondary_nodes:
         raise errors.ConfigurationError("No secondary node but using"
@@ -8427,6 +8450,8 @@ class TLMigrateInstance(Tasklet):
     """Run the allocator based on input opcode.
 
     """
+    assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
+
     # FIXME: add a self.ignore_ipolicy option
     req = iallocator.IAReqRelocate(name=self.instance_name,
                                    relocate_from=[self.instance.primary_node])
@@ -9825,7 +9850,7 @@ class LUInstanceCreate(LogicalUnit):
       # specifying a group on instance creation and then selecting nodes from
       # that group
       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
-      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
+      self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
     else:
       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
       nodelist = [self.op.pnode]
@@ -9833,9 +9858,6 @@ class LUInstanceCreate(LogicalUnit):
         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
         nodelist.append(self.op.snode)
       self.needed_locks[locking.LEVEL_NODE] = nodelist
-      # Lock resources of instance's primary and secondary nodes (copy to
-      # prevent accidential modification)
-      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
 
     # in case of import lock the source node too
     if self.op.mode == constants.INSTANCE_IMPORT:
@@ -9847,6 +9869,7 @@ class LUInstanceCreate(LogicalUnit):
 
       if src_node is None:
         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+        self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
         self.op.src_node = None
         if os.path.isabs(src_path):
           raise errors.OpPrereqError("Importing an instance from a path"
@@ -9860,6 +9883,9 @@ class LUInstanceCreate(LogicalUnit):
           self.op.src_path = src_path = \
             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
 
+    self.needed_locks[locking.LEVEL_NODE_RES] = \
+      _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
+
   def _RunAllocator(self):
     """Run the allocator based on input opcode.
 
@@ -10230,12 +10256,14 @@ class LUInstanceCreate(LogicalUnit):
       self._RunAllocator()
 
     # Release all unneeded node locks
-    _ReleaseLocks(self, locking.LEVEL_NODE,
-                  keep=filter(None, [self.op.pnode, self.op.snode,
-                                     self.op.src_node]))
-    _ReleaseLocks(self, locking.LEVEL_NODE_RES,
-                  keep=filter(None, [self.op.pnode, self.op.snode,
-                                     self.op.src_node]))
+    keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
+    _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
+    _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
+    _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
+
+    assert (self.owned_locks(locking.LEVEL_NODE) ==
+            self.owned_locks(locking.LEVEL_NODE_RES)), \
+      "Node locks differ from node resource locks"
 
     #### node related checks
 
@@ -10460,6 +10488,7 @@ class LUInstanceCreate(LogicalUnit):
     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
                 self.owned_locks(locking.LEVEL_NODE)), \
       "Node locks differ from node resource locks"
+    assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
 
     ht_kind = self.op.hypervisor
     if ht_kind in constants.HTS_REQ_PORT:
@@ -10959,6 +10988,7 @@ class LUInstanceReplaceDisks(LogicalUnit):
       if self.op.iallocator is not None:
         # iallocator will select a new node in the same group
         self.needed_locks[locking.LEVEL_NODEGROUP] = []
+        self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
 
     self.needed_locks[locking.LEVEL_NODE_RES] = []
 
@@ -10985,6 +11015,7 @@ class LUInstanceReplaceDisks(LogicalUnit):
       if self.op.iallocator is not None:
         assert self.op.remote_node is None
         assert not self.needed_locks[locking.LEVEL_NODE]
+        assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
 
         # Lock member nodes of all locked groups
         self.needed_locks[locking.LEVEL_NODE] = \
@@ -10992,7 +11023,10 @@ class LUInstanceReplaceDisks(LogicalUnit):
              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
       else:
+        assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
+
         self._LockInstancesNodes()
+
     elif level == locking.LEVEL_NODE_RES:
       # Reuse node locks
       self.needed_locks[locking.LEVEL_NODE_RES] = \
@@ -11264,10 +11298,10 @@ class TLReplaceDisks(Tasklet):
     # Release unneeded node and node resource locks
     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
+    _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
 
     # Release any owned node group
-    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
-      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
+    _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
 
     # Check whether disks are valid
     for disk_idx in self.disks:
@@ -11291,6 +11325,7 @@ class TLReplaceDisks(Tasklet):
            (owned_nodes, self.node_secondary_ip.keys()))
       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
               self.lu.owned_locks(locking.LEVEL_NODE_RES))
+      assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
 
       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
       assert list(owned_instances) == [self.instance_name], \
@@ -13681,9 +13716,11 @@ class LUInstanceChangeGroup(LogicalUnit):
 
   def ExpandNames(self):
     self.share_locks = _ShareAll()
+
     self.needed_locks = {
       locking.LEVEL_NODEGROUP: [],
       locking.LEVEL_NODE: [],
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
       }
 
     self._ExpandAndLockInstance()
@@ -16105,6 +16142,14 @@ class LUNetworkConnect(LogicalUnit):
     return (nodes, nodes)
 
   def CheckPrereq(self):
+    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+
+    assert self.group_uuid in owned_groups
+
+    # Check if locked instances are still correct
+    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
+
     l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
                                       for i in value)
 
@@ -16123,20 +16168,20 @@ class LUNetworkConnect(LogicalUnit):
       self.connected = True
       return
 
-    pool = network.AddressPool(self.network)
     if self.op.conflicts_check:
-      groupinstances = []
-      for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
-        groupinstances.append(self.cfg.GetInstanceInfo(n))
-      instances = [(instance.name, idx, nic.ip)
-                   for instance in groupinstances
-                   for idx, nic in enumerate(instance.nics)
-                   if (not nic.network and pool.Contains(nic.ip))]
-      if instances:
+      pool = network.AddressPool(self.network)
+      conflicting_instances = []
+
+      for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
+        for idx, nic in enumerate(instance.nics):
+          if pool.Contains(nic.ip):
+            conflicting_instances.append((instance.name, idx, nic.ip))
+
+      if conflicting_instances:
         self.LogWarning("Following occurences use IPs from network %s"
                         " that is about to connect to nodegroup %s: %s" %
                         (self.network_name, self.group.name,
-                        l(instances)))
+                        l(conflicting_instances)))
         raise errors.OpPrereqError("Conflicting IPs found."
                                    " Please remove/modify"
                                    " corresponding NICs",
@@ -16187,7 +16232,7 @@ class LUNetworkDisconnect(LogicalUnit):
       # been acquired
       if self.op.conflicts_check:
         self.needed_locks[locking.LEVEL_INSTANCE] = \
-            self.cfg.GetNodeGroupInstances(self.group_uuid)
+          self.cfg.GetNodeGroupInstances(self.group_uuid)
 
   def BuildHooksEnv(self):
     ret = {
@@ -16201,6 +16246,14 @@ class LUNetworkDisconnect(LogicalUnit):
     return (nodes, nodes)
 
   def CheckPrereq(self):
+    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+
+    assert self.group_uuid in owned_groups
+
+    # Check if locked instances are still correct
+    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
+
     l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
                                       for i in value)
 
@@ -16212,19 +16265,19 @@ class LUNetworkDisconnect(LogicalUnit):
       return
 
     if self.op.conflicts_check:
-      groupinstances = []
-      for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
-        groupinstances.append(self.cfg.GetInstanceInfo(n))
-      instances = [(instance.name, idx, nic.ip)
-                   for instance in groupinstances
-                   for idx, nic in enumerate(instance.nics)
-                   if nic.network == self.network_name]
-      if instances:
+      conflicting_instances = []
+
+      for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
+        for idx, nic in enumerate(instance.nics):
+          if nic.network == self.network_name:
+            conflicting_instances.append((instance.name, idx, nic.ip))
+
+      if conflicting_instances:
         self.LogWarning("Following occurences use IPs from network %s"
                            " that is about to disconnected from the nodegroup"
                            " %s: %s" %
                            (self.network_name, self.group.name,
-                            l(instances)))
+                            l(conflicting_instances)))
         raise errors.OpPrereqError("Conflicting IPs."
                                    " Please remove/modify"
                                    " corresponding NICS",