hv_kvm: Add TODO regarding monitor commands

[ganeti-local] / lib / mcpu.py
diff --git a/lib/mcpu.py b/lib/mcpu.py

index b227334..594e16e 100644 (file)
--- a/lib/mcpu.py
+++ b/lib/mcpu.py
@@ -48,6 +48,19 @@ from ganeti import pathutils
  _OP_PREFIX = "Op"
  _LU_PREFIX = "LU"
  
+#: LU classes which don't need to acquire the node allocation lock
+#: (L{locking.NAL}) when they acquire all node or node resource locks
+_NODE_ALLOC_WHITELIST = frozenset([])
+
+#: LU classes which don't need to acquire the node allocation lock
+#: (L{locking.NAL}) in the same mode (shared/exclusive) as the node
+#: or node resource locks
+_NODE_ALLOC_MODE_WHITELIST = compat.UniqueFrozenset([
+  cmdlib.LUBackupExport,
+  cmdlib.LUBackupRemove,
+  cmdlib.LUOobCommand,
+  ])
+
  
  class LockAcquireTimeout(Exception):
    """Exception to report timeouts on acquiring locks.
@@ -246,6 +259,44 @@ def _RpcResultsToHooksResults(rpc_results):
                for (node, rpc_res) in rpc_results.items())
  
  
+def _VerifyLocks(lu, glm, _mode_whitelist=_NODE_ALLOC_MODE_WHITELIST,
+                 _nal_whitelist=_NODE_ALLOC_WHITELIST):
+  """Performs consistency checks on locks acquired by a logical unit.
+
+  @type lu: L{cmdlib.LogicalUnit}
+  @param lu: Logical unit instance
+  @type glm: L{locking.GanetiLockManager}
+  @param glm: Lock manager
+
+  """
+  if not __debug__:
+    return
+
+  have_nal = glm.check_owned(locking.LEVEL_NODE_ALLOC, locking.NAL)
+
+  for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
+    # TODO: Verify using actual lock mode, not using LU variables
+    if level in lu.needed_locks:
+      share_node_alloc = lu.share_locks[locking.LEVEL_NODE_ALLOC]
+      share_level = lu.share_locks[level]
+
+      if lu.__class__ in _mode_whitelist:
+        assert share_node_alloc != share_level, \
+          "LU is whitelisted to use different modes for node allocation lock"
+      else:
+        assert bool(share_node_alloc) == bool(share_level), \
+          ("Node allocation lock must be acquired using the same mode as nodes"
+           " and node resources")
+
+      if lu.__class__ in _nal_whitelist:
+        assert not have_nal, \
+          "LU is whitelisted for not acquiring the node allocation lock"
+      elif lu.needed_locks[level] == locking.ALL_SET or glm.owning_all(level):
+        assert have_nal, \
+          ("Node allocation lock must be used if an LU acquires all nodes"
+           " or node resources")
+
+
  class Processor(object):
    """Object which runs OpCodes"""
    DISPATCH_TABLE = _ComputeDispatchTable()
@@ -275,7 +326,7 @@ class Processor(object):
      if not self._enable_locks:
        raise errors.ProgrammerError("Attempted to use disabled locks")
  
-  def _AcquireLocks(self, level, names, shared, timeout):
+  def _AcquireLocks(self, level, names, shared, opportunistic, timeout):
      """Acquires locks via the Ganeti lock manager.
  
      @type level: int
@@ -284,6 +335,8 @@ class Processor(object):
      @param names: Lock names
      @type shared: bool
      @param shared: Whether the locks should be acquired in shared mode
+    @type opportunistic: bool
+    @param opportunistic: Whether to acquire opportunistically
      @type timeout: None or float
      @param timeout: Timeout for acquiring the locks
      @raise LockAcquireTimeout: In case locks couldn't be acquired in specified
@@ -298,7 +351,8 @@ class Processor(object):
        priority = None
  
      acquired = self.context.glm.acquire(level, names, shared=shared,
-                                        timeout=timeout, priority=priority)
+                                        timeout=timeout, priority=priority,
+                                        opportunistic=opportunistic)
  
      if acquired is None:
        raise LockAcquireTimeout()
@@ -353,9 +407,13 @@ class Processor(object):
      given LU and its opcodes.
  
      """
+    glm = self.context.glm
      adding_locks = level in lu.add_locks
      acquiring_locks = level in lu.needed_locks
+
      if level not in locking.LEVELS:
+      _VerifyLocks(lu, glm)
+
        if self._cbs:
          self._cbs.NotifyStart()
  
@@ -384,6 +442,7 @@ class Processor(object):
  
        lu.DeclareLocks(level)
        share = lu.share_locks[level]
+      opportunistic = lu.opportunistic_locks[level]
  
        try:
          assert adding_locks ^ acquiring_locks, \
@@ -393,7 +452,7 @@ class Processor(object):
            # Acquiring locks
            needed_locks = lu.needed_locks[level]
  
-          self._AcquireLocks(level, needed_locks, share,
+          self._AcquireLocks(level, needed_locks, share, opportunistic,
                               calc_timeout())
          else:
            # Adding locks
@@ -401,7 +460,7 @@ class Processor(object):
            lu.remove_locks[level] = add_locks
  
            try:
-            self.context.glm.add(level, add_locks, acquired=1, shared=share)
+            glm.add(level, add_locks, acquired=1, shared=share)
            except errors.LockError:
              logging.exception("Detected lock error in level %s for locks"
                                " %s, shared=%s", level, add_locks, share)
@@ -414,10 +473,10 @@ class Processor(object):
            result = self._LockAndExecLU(lu, level + 1, calc_timeout)
          finally:
            if level in lu.remove_locks:
-            self.context.glm.remove(level, lu.remove_locks[level])
+            glm.remove(level, lu.remove_locks[level])
        finally:
-        if self.context.glm.is_owned(level):
-          self.context.glm.release(level)
+        if glm.is_owned(level):
+          glm.release(level)
  
      else:
        result = self._LockAndExecLU(lu, level + 1, calc_timeout)
@@ -457,7 +516,7 @@ class Processor(object):
          # and in a shared fashion otherwise (to prevent concurrent run with
          # an exclusive LU.
          self._AcquireLocks(locking.LEVEL_CLUSTER, locking.BGL,
-                            not lu_class.REQ_BGL, calc_timeout())
+                            not lu_class.REQ_BGL, False, calc_timeout())
        elif lu_class.REQ_BGL:
          raise errors.ProgrammerError("Opcode '%s' requires BGL, but locks are"
                                       " disabled" % op.OP_ID)
@@ -468,7 +527,8 @@ class Processor(object):
          assert lu.needed_locks is not None, "needed_locks not set by LU"
  
          try:
-          result = self._LockAndExecLU(lu, locking.LEVEL_INSTANCE, calc_timeout)
+          result = self._LockAndExecLU(lu, locking.LEVEL_CLUSTER + 1,
+                                       calc_timeout)
          finally:
            if self._ec_id:
              self.context.cfg.DropECReservations(self._ec_id)
@@ -484,8 +544,12 @@ class Processor(object):
      if not (resultcheck_fn is None or resultcheck_fn(result)):
        logging.error("Expected opcode result matching %s, got %s",
                      resultcheck_fn, result)
-      raise errors.OpResultError("Opcode result does not match %s: %s" %
-                                 (resultcheck_fn, utils.Truncate(result, 80)))
+      if not getattr(op, "dry_run", False):
+        # FIXME: LUs should still behave in dry_run mode, or
+        # alternately we should have OP_DRYRUN_RESULT; in the
+        # meantime, we simply skip the OP_RESULT check in dry-run mode
+        raise errors.OpResultError("Opcode result does not match %s: %s" %
+                                   (resultcheck_fn, utils.Truncate(result, 80)))
  
      return result