Reduce the chance of DRBD errors with stale primaries

author Iustin Pop <iustin@google.com>

Tue, 5 Feb 2008 13:31:58 +0000 (13:31 +0000)

committer Iustin Pop <iustin@google.com>

Tue, 5 Feb 2008 13:31:58 +0000 (13:31 +0000)
author Iustin Pop <iustin@google.com>
Tue, 5 Feb 2008 13:31:58 +0000 (13:31 +0000)
committer Iustin Pop <iustin@google.com>
Tue, 5 Feb 2008 13:31:58 +0000 (13:31 +0000)
diff --git a/lib/bdev.py b/lib/bdev.py

index 1552956..a655695 100644 (file)
--- a/lib/bdev.py
+++ b/lib/bdev.py
@@ -122,7 +122,13 @@ class BlockDev(object):
        status = status and child.Assemble()
        if not status:
          break
-      status = status and child.Open()
+
+      try:
+        child.Open()
+      except errors.BlockDeviceError:
+        for child in self._children:
+          child.Shutdown()
+        raise
  
      if not status:
        for child in self._children:
@@ -502,7 +508,7 @@ class LogicalVolume(BlockDev):
      This is a no-op for the LV device type.
  
      """
-    return True
+    pass
  
    def Close(self):
      """Notifies that the device will no longer be used for I/O.
@@ -510,7 +516,7 @@ class LogicalVolume(BlockDev):
      This is a no-op for the LV device type.
  
      """
-    return True
+    pass
  
    def Snapshot(self, size):
      """Create a snapshot copy of an lvm block device.
@@ -954,7 +960,7 @@ class MDRaid1(BlockDev):
      the 2.6.18's new array_state thing.
  
      """
-    return True
+    pass
  
    def Close(self):
      """Notifies that the device will no longer be used for I/O.
@@ -963,7 +969,7 @@ class MDRaid1(BlockDev):
      `Open()`.
  
      """
-    return True
+    pass
  
  
  class BaseDRBD(BlockDev):
@@ -1456,9 +1462,9 @@ class DRBDev(BaseDRBD):
        cmd.append("--do-what-I-say")
      result = utils.RunCmd(cmd)
      if result.failed:
-      logger.Error("Can't make drbd device primary: %s" % result.output)
-      return False
-    return True
+      msg = ("Can't make drbd device primary: %s" % result.output)
+      logger.Error(msg)
+      raise errors.BlockDeviceError(msg)
  
    def Close(self):
      """Make the local state secondary.
@@ -1471,8 +1477,10 @@ class DRBDev(BaseDRBD):
        raise errors.BlockDeviceError("Can't find device")
      result = utils.RunCmd(["drbdsetup", self.dev_path, "secondary"])
      if result.failed:
-      logger.Error("Can't switch drbd device to secondary: %s" % result.output)
-      raise errors.BlockDeviceError("Can't switch drbd device to secondary")
+      msg = ("Can't switch drbd device to"
+             " secondary: %s" % result.output)
+      logger.Error(msg)
+      raise errors.BlockDeviceError(msg)
  
    def SetSyncSpeed(self, kbytes):
      """Set the speed of the DRBD syncer.
@@ -2068,9 +2076,9 @@ class DRBD8(BaseDRBD):
        cmd.append("-o")
      result = utils.RunCmd(cmd)
      if result.failed:
-      logger.Error("Can't make drbd device primary: %s" % result.output)
-      return False
-    return True
+      msg = ("Can't make drbd device primary: %s" % result.output)
+      logger.Error(msg)
+      raise errors.BlockDeviceError(msg)
  
    def Close(self):
      """Make the local state secondary.
@@ -2083,8 +2091,10 @@ class DRBD8(BaseDRBD):
        raise errors.BlockDeviceError("Can't find device")
      result = utils.RunCmd(["drbdsetup", self.dev_path, "secondary"])
      if result.failed:
-      logger.Error("Can't switch drbd device to secondary: %s" % result.output)
-      raise errors.BlockDeviceError("Can't switch drbd device to secondary")
+      msg = ("Can't switch drbd device to"
+             " secondary: %s" % result.output)
+      logger.Error(msg)
+      raise errors.BlockDeviceError(msg)
  
    def Attach(self):
      """Find a DRBD device which matches our config and attach to it.
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index b45a83b..6bddebf 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -1860,23 +1860,41 @@ def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False):
    """
    device_info = []
    disks_ok = True
+  iname = instance.name
+  # With the two passes mechanism we try to reduce the window of
+  # opportunity for the race condition of switching DRBD to primary
+  # before handshaking occured, but we do not eliminate it
+
+  # The proper fix would be to wait (with some limits) until the
+  # connection has been made and drbd transitions from WFConnection
+  # into any other network-connected state (Connected, SyncTarget,
+  # SyncSource, etc.)
+
+  # 1st pass, assemble on all nodes in secondary mode
    for inst_disk in instance.disks:
-    master_result = None
      for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
        cfg.SetDiskID(node_disk, node)
-      is_primary = node == instance.primary_node
-      result = rpc.call_blockdev_assemble(node, node_disk,
-                                          instance.name, is_primary)
+      result = rpc.call_blockdev_assemble(node, node_disk, iname, False)
        if not result:
          logger.Error("could not prepare block device %s on node %s"
-                     " (is_primary=%s)" %
-                     (inst_disk.iv_name, node, is_primary))
-        if is_primary or not ignore_secondaries:
+                     " (is_primary=False, pass=1)" % (inst_disk.iv_name, node))
+        if not ignore_secondaries:
            disks_ok = False
-      if is_primary:
-        master_result = result
-    device_info.append((instance.primary_node, inst_disk.iv_name,
-                        master_result))
+
+  # FIXME: race condition on drbd migration to primary
+
+  # 2nd pass, do only the primary node
+  for inst_disk in instance.disks:
+    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
+      if node != instance.primary_node:
+        continue
+      cfg.SetDiskID(node_disk, node)
+      result = rpc.call_blockdev_assemble(node, node_disk, iname, True)
+      if not result:
+        logger.Error("could not prepare block device %s on node %s"
+                     " (is_primary=True, pass=2)" % (inst_disk.iv_name, node))
+        disks_ok = False
+    device_info.append((instance.primary_node, inst_disk.iv_name, result))
  
    # leave the disks configured for the primary node
    # this is a workaround that would be fixed better by
author	Iustin Pop <iustin@google.com>
	Tue, 5 Feb 2008 13:31:58 +0000 (13:31 +0000)
committer	Iustin Pop <iustin@google.com>
	Tue, 5 Feb 2008 13:31:58 +0000 (13:31 +0000)
lib/bdev.py		patch \| blob \| history
lib/cmdlib.py		patch \| blob \| history