LUBackupRemove: Use node allocation lock
[ganeti-local] / lib / cmdlib.py
index cfb708b..7b6d571 100644 (file)
@@ -2233,6 +2233,11 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
       locking.LEVEL_INSTANCE: inst_names,
       locking.LEVEL_NODEGROUP: [self.group_uuid],
       locking.LEVEL_NODE: [],
+
+      # This opcode is run by watcher every five minutes and acquires all nodes
+      # for a group. It doesn't run for a long time, so it's better to acquire
+      # the node allocation lock as well.
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
       }
 
     self.share_locks = _ShareAll()
@@ -3680,6 +3685,12 @@ class LUGroupVerifyDisks(NoHooksLU):
       locking.LEVEL_INSTANCE: [],
       locking.LEVEL_NODEGROUP: [],
       locking.LEVEL_NODE: [],
+
+      # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
+      # starts one instance of this opcode for every group, which means all
+      # nodes will be locked for a short amount of time, so it's better to
+      # acquire the node allocation lock as well.
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
       }
 
   def DeclareLocks(self, level):
@@ -3786,6 +3797,8 @@ class LUClusterRepairDiskSizes(NoHooksLU):
   def ExpandNames(self):
     if self.op.instances:
       self.wanted_names = _GetWantedInstances(self, self.op.instances)
+      # Not getting the node allocation lock as only a specific set of
+      # instances (and their nodes) is going to be acquired
       self.needed_locks = {
         locking.LEVEL_NODE_RES: [],
         locking.LEVEL_INSTANCE: self.wanted_names,
@@ -3796,10 +3809,15 @@ class LUClusterRepairDiskSizes(NoHooksLU):
       self.needed_locks = {
         locking.LEVEL_NODE_RES: locking.ALL_SET,
         locking.LEVEL_INSTANCE: locking.ALL_SET,
+
+        # This opcode is acquires the node locks for all instances
+        locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
         }
+
     self.share_locks = {
       locking.LEVEL_NODE_RES: 1,
       locking.LEVEL_INSTANCE: 0,
+      locking.LEVEL_NODE_ALLOC: 1,
       }
 
   def DeclareLocks(self, level):
@@ -4596,8 +4614,9 @@ class LUClusterRedistConf(NoHooksLU):
   def ExpandNames(self):
     self.needed_locks = {
       locking.LEVEL_NODE: locking.ALL_SET,
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
     }
-    self.share_locks[locking.LEVEL_NODE] = 1
+    self.share_locks = _ShareAll()
 
   def Exec(self, feedback_fn):
     """Redistribute the configuration.
@@ -4795,6 +4814,11 @@ class LUOobCommand(NoHooksLU):
       locking.LEVEL_NODE: lock_names,
       }
 
+    if not self.op.node_names:
+      # Acquire node allocation lock only if all nodes are affected
+      self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+      self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
+
   def CheckPrereq(self):
     """Check prerequisites.
 
@@ -5322,13 +5346,16 @@ class LUNodeQueryvols(NoHooksLU):
 
   def ExpandNames(self):
     self.share_locks = _ShareAll()
-    self.needed_locks = {}
 
-    if not self.op.nodes:
-      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+    if self.op.nodes:
+      self.needed_locks = {
+        locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
+        }
     else:
-      self.needed_locks[locking.LEVEL_NODE] = \
-        _GetWantedNodes(self, self.op.nodes)
+      self.needed_locks = {
+        locking.LEVEL_NODE: locking.ALL_SET,
+        locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+        }
 
   def Exec(self, feedback_fn):
     """Computes the list of nodes and their attributes.
@@ -6026,19 +6053,28 @@ class LUNodeSetParams(LogicalUnit):
 
   def ExpandNames(self):
     if self.lock_all:
-      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
+      self.needed_locks = {
+        locking.LEVEL_NODE: locking.ALL_SET,
+
+        # Block allocations when all nodes are locked
+        locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+        }
     else:
-      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
+      self.needed_locks = {
+        locking.LEVEL_NODE: self.op.node_name,
+        }
 
     # Since modifying a node can have severe effects on currently running
     # operations the resource lock is at least acquired in shared mode
     self.needed_locks[locking.LEVEL_NODE_RES] = \
       self.needed_locks[locking.LEVEL_NODE]
 
-    # Get node resource and instance locks in shared mode; they are not used
-    # for anything but read-only access
-    self.share_locks[locking.LEVEL_NODE_RES] = 1
-    self.share_locks[locking.LEVEL_INSTANCE] = 1
+    # Get all locks except nodes in shared mode; they are not used for anything
+    # but read-only access
+    self.share_locks = _ShareAll()
+    self.share_locks[locking.LEVEL_NODE] = 0
+    self.share_locks[locking.LEVEL_NODE_RES] = 0
+    self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
 
     if self.lock_instances:
       self.needed_locks[locking.LEVEL_INSTANCE] = \
@@ -7358,6 +7394,7 @@ class LUInstanceRecreateDisks(LogicalUnit):
   def ExpandNames(self):
     self._ExpandAndLockInstance()
     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+
     if self.op.nodes:
       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
@@ -7366,6 +7403,8 @@ class LUInstanceRecreateDisks(LogicalUnit):
       if self.op.iallocator:
         # iallocator will select a new node in the same group
         self.needed_locks[locking.LEVEL_NODEGROUP] = []
+        self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+
     self.needed_locks[locking.LEVEL_NODE_RES] = []
 
   def DeclareLocks(self, level):
@@ -7395,6 +7434,8 @@ class LUInstanceRecreateDisks(LogicalUnit):
         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
           self.needed_locks[locking.LEVEL_NODE].extend(
             self.cfg.GetNodeGroup(group_uuid).members)
+
+        assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
       elif not self.op.nodes:
         self._LockInstancesNodes(primary_only=False)
     elif level == locking.LEVEL_NODE_RES:
@@ -7485,6 +7526,9 @@ class LUInstanceRecreateDisks(LogicalUnit):
       # Release unneeded node and node resource locks
       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
+      _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
+
+    assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
 
   def Exec(self, feedback_fn):
     """Recreate the disks.
@@ -10767,7 +10811,11 @@ class LUInstanceMultiAlloc(NoHooksLU):
 
     """
     self.share_locks = _ShareAll()
-    self.needed_locks = {}
+    self.needed_locks = {
+      # iallocator will select nodes and even if no iallocator is used,
+      # collisions with LUInstanceCreate should be avoided
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+      }
 
     if self.op.iallocator:
       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
@@ -10988,6 +11036,7 @@ class LUInstanceReplaceDisks(LogicalUnit):
       if self.op.iallocator is not None:
         # iallocator will select a new node in the same group
         self.needed_locks[locking.LEVEL_NODEGROUP] = []
+        self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
 
     self.needed_locks[locking.LEVEL_NODE_RES] = []
 
@@ -11014,6 +11063,7 @@ class LUInstanceReplaceDisks(LogicalUnit):
       if self.op.iallocator is not None:
         assert self.op.remote_node is None
         assert not self.needed_locks[locking.LEVEL_NODE]
+        assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
 
         # Lock member nodes of all locked groups
         self.needed_locks[locking.LEVEL_NODE] = \
@@ -11021,7 +11071,10 @@ class LUInstanceReplaceDisks(LogicalUnit):
              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
       else:
+        assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
+
         self._LockInstancesNodes()
+
     elif level == locking.LEVEL_NODE_RES:
       # Reuse node locks
       self.needed_locks[locking.LEVEL_NODE_RES] = \
@@ -11293,10 +11346,10 @@ class TLReplaceDisks(Tasklet):
     # Release unneeded node and node resource locks
     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
+    _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
 
     # Release any owned node group
-    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
-      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
+    _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
 
     # Check whether disks are valid
     for disk_idx in self.disks:
@@ -11320,6 +11373,7 @@ class TLReplaceDisks(Tasklet):
            (owned_nodes, self.node_secondary_ip.keys()))
       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
               self.lu.owned_locks(locking.LEVEL_NODE_RES))
+      assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
 
       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
       assert list(owned_instances) == [self.instance_name], \
@@ -13710,9 +13764,11 @@ class LUInstanceChangeGroup(LogicalUnit):
 
   def ExpandNames(self):
     self.share_locks = _ShareAll()
+
     self.needed_locks = {
       locking.LEVEL_NODEGROUP: [],
       locking.LEVEL_NODE: [],
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
       }
 
     self._ExpandAndLockInstance()
@@ -14278,11 +14334,19 @@ class LUBackupRemove(NoHooksLU):
   REQ_BGL = False
 
   def ExpandNames(self):
-    self.needed_locks = {}
-    # We need all nodes to be locked in order for RemoveExport to work, but we
-    # don't need to lock the instance itself, as nothing will happen to it (and
-    # we can remove exports also for a removed instance)
-    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+    self.needed_locks = {
+      # We need all nodes to be locked in order for RemoveExport to work, but
+      # we don't need to lock the instance itself, as nothing will happen to it
+      # (and we can remove exports also for a removed instance)
+      locking.LEVEL_NODE: locking.ALL_SET,
+
+      # Removing backups is quick, so blocking allocations is justified
+      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+      }
+
+    # Allocations should be stopped while this LU runs with node locks, but it
+    # doesn't have to be exclusive
+    self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
 
   def Exec(self, feedback_fn):
     """Remove any export.
@@ -15635,6 +15699,11 @@ class LUNetworkAdd(LogicalUnit):
     mn = self.cfg.GetMasterNode()
     return ([mn], [mn])
 
+  def CheckArguments(self):
+    if self.op.mac_prefix:
+      self.op.mac_prefix = \
+        utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
+
   def ExpandNames(self):
     self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
 
@@ -15649,12 +15718,6 @@ class LUNetworkAdd(LogicalUnit):
     self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
 
   def CheckPrereq(self):
-    """Check prerequisites.
-
-    This checks that the given group name is not an existing node group
-    already.
-
-    """
     if self.op.network is None:
       raise errors.OpPrereqError("Network must be given",
                                  errors.ECODE_INVAL)
@@ -15665,9 +15728,6 @@ class LUNetworkAdd(LogicalUnit):
       raise errors.OpPrereqError("Network '%s' already defined" %
                                  self.op.network, errors.ECODE_EXISTS)
 
-    if self.op.mac_prefix:
-      utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
-
     # Check tag validity
     for tag in self.op.tags:
       objects.TaggableObject.ValidateTag(tag)
@@ -15755,7 +15815,8 @@ class LUNetworkRemove(LogicalUnit):
     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
 
     if not self.network_uuid:
-      raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
+      raise errors.OpPrereqError(("Network '%s' not found" %
+                                  self.op.network_name),
                                  errors.ECODE_INVAL)
 
     self.share_locks[locking.LEVEL_NODEGROUP] = 1
@@ -15772,19 +15833,17 @@ class LUNetworkRemove(LogicalUnit):
     cluster.
 
     """
-
     # Verify that the network is not conncted.
     node_groups = [group.name
                    for group in self.cfg.GetAllNodeGroupsInfo().values()
-                   for net in group.networks.keys()
-                   if net == self.network_uuid]
+                   if self.network_uuid in group.networks]
 
     if node_groups:
-      self.LogWarning("Nework '%s' is connected to the following"
-                      " node groups: %s" % (self.op.network_name,
-                      utils.CommaJoin(utils.NiceSort(node_groups))))
-      raise errors.OpPrereqError("Network still connected",
-                                 errors.ECODE_STATE)
+      self.LogWarning("Network '%s' is connected to the following"
+                      " node groups: %s" %
+                      (self.op.network_name,
+                       utils.CommaJoin(utils.NiceSort(node_groups))))
+      raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
 
   def BuildHooksEnv(self):
     """Build hooks env.
@@ -15828,11 +15887,11 @@ class LUNetworkSetParams(LogicalUnit):
 
   def ExpandNames(self):
     self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
-    self.network = self.cfg.GetNetwork(self.network_uuid)
-    if self.network is None:
-      raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
-                                 (self.op.network_name, self.network_uuid),
+    if self.network_uuid is None:
+      raise errors.OpPrereqError(("Network '%s' not found" %
+                                  self.op.network_name),
                                  errors.ECODE_INVAL)
+
     self.needed_locks = {
       locking.LEVEL_NETWORK: [self.network_uuid],
       }
@@ -15841,6 +15900,7 @@ class LUNetworkSetParams(LogicalUnit):
     """Check prerequisites.
 
     """
+    self.network = self.cfg.GetNetwork(self.network_uuid)
     self.gateway = self.network.gateway
     self.network_type = self.network.network_type
     self.mac_prefix = self.network.mac_prefix
@@ -15869,8 +15929,8 @@ class LUNetworkSetParams(LogicalUnit):
       if self.op.mac_prefix == constants.VALUE_NONE:
         self.mac_prefix = None
       else:
-        utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
-        self.mac_prefix = self.op.mac_prefix
+        self.mac_prefix = \
+          utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
 
     if self.op.gateway6:
       if self.op.gateway6 == constants.VALUE_NONE:
@@ -16092,22 +16152,24 @@ class LUNetworkConnect(LogicalUnit):
     self.network_link = self.op.network_link
 
     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
-    self.network = self.cfg.GetNetwork(self.network_uuid)
-    if self.network is None:
+    if self.network_uuid is None:
       raise errors.OpPrereqError("Network %s does not exist" %
                                  self.network_name, errors.ECODE_INVAL)
 
     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
-    self.group = self.cfg.GetNodeGroup(self.group_uuid)
-    if self.group is None:
+    if self.group_uuid is None:
       raise errors.OpPrereqError("Group %s does not exist" %
                                  self.group_name, errors.ECODE_INVAL)
 
-    self.share_locks[locking.LEVEL_INSTANCE] = 1
     self.needed_locks = {
       locking.LEVEL_INSTANCE: [],
       locking.LEVEL_NODEGROUP: [self.group_uuid],
       }
+    self.share_locks[locking.LEVEL_INSTANCE] = 1
+
+    if self.op.conflicts_check:
+      self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
+      self.share_locks[locking.LEVEL_NETWORK] = 1
 
   def DeclareLocks(self, level):
     if level == locking.LEVEL_INSTANCE:
@@ -16118,7 +16180,6 @@ class LUNetworkConnect(LogicalUnit):
       if self.op.conflicts_check:
         self.needed_locks[locking.LEVEL_INSTANCE] = \
             self.cfg.GetNodeGroupInstances(self.group_uuid)
-        self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
 
   def BuildHooksEnv(self):
     ret = {
@@ -16126,7 +16187,6 @@ class LUNetworkConnect(LogicalUnit):
       "GROUP_NETWORK_MODE": self.network_mode,
       "GROUP_NETWORK_LINK": self.network_link,
       }
-    ret.update(_BuildNetworkHookEnvByObject(self.network))
     return ret
 
   def BuildHooksNodes(self):
@@ -16134,14 +16194,10 @@ class LUNetworkConnect(LogicalUnit):
     return (nodes, nodes)
 
   def CheckPrereq(self):
-    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
 
     assert self.group_uuid in owned_groups
 
-    # Check if locked instances are still correct
-    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
-
     l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
                                       for i in value)
 
@@ -16151,6 +16207,7 @@ class LUNetworkConnect(LogicalUnit):
       }
     objects.NIC.CheckParameterSyntax(self.netparams)
 
+    self.group = self.cfg.GetNodeGroup(self.group_uuid)
     #if self.network_mode == constants.NIC_MODE_BRIDGED:
     #  _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
     self.connected = False
@@ -16161,7 +16218,12 @@ class LUNetworkConnect(LogicalUnit):
       return
 
     if self.op.conflicts_check:
-      pool = network.AddressPool(self.network)
+      # Check if locked instances are still correct
+      owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+      _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
+
+      nobj = self.cfg.GetNetwork(self.network_uuid)
+      pool = network.AddressPool(nobj)
       conflicting_instances = []
 
       for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
@@ -16200,18 +16262,17 @@ class LUNetworkDisconnect(LogicalUnit):
     self.group_name = self.op.group_name
 
     self.network_uuid = self.cfg.LookupNetwork(self.network_name)
-    self.network = self.cfg.GetNetwork(self.network_uuid)
-    if self.network is None:
+    if self.network_uuid is None:
       raise errors.OpPrereqError("Network %s does not exist" %
                                  self.network_name, errors.ECODE_INVAL)
 
     self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
-    self.group = self.cfg.GetNodeGroup(self.group_uuid)
-    if self.group is None:
+    if self.group_uuid is None:
       raise errors.OpPrereqError("Group %s does not exist" %
                                  self.group_name, errors.ECODE_INVAL)
 
     self.needed_locks = {
+      locking.LEVEL_INSTANCE: [],
       locking.LEVEL_NODEGROUP: [self.group_uuid],
       }
     self.share_locks[locking.LEVEL_INSTANCE] = 1
@@ -16230,7 +16291,6 @@ class LUNetworkDisconnect(LogicalUnit):
     ret = {
       "GROUP_NAME": self.group_name,
       }
-    ret.update(_BuildNetworkHookEnvByObject(self.network))
     return ret
 
   def BuildHooksNodes(self):
@@ -16238,17 +16298,14 @@ class LUNetworkDisconnect(LogicalUnit):
     return (nodes, nodes)
 
   def CheckPrereq(self):
-    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
 
     assert self.group_uuid in owned_groups
 
-    # Check if locked instances are still correct
-    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
-
     l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
                                       for i in value)
 
+    self.group = self.cfg.GetNodeGroup(self.group_uuid)
     self.connected = True
     if self.network_uuid not in self.group.networks:
       self.LogWarning("Network '%s' is not mapped to group '%s'",
@@ -16257,6 +16314,10 @@ class LUNetworkDisconnect(LogicalUnit):
       return
 
     if self.op.conflicts_check:
+      # Check if locked instances are still correct
+      owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+      _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
+
       conflicting_instances = []
 
       for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):