bdev: Convert to utils.Retry
authorMichael Hanselmann <hansmi@google.com>
Fri, 30 Oct 2009 16:38:24 +0000 (17:38 +0100)
committerMichael Hanselmann <hansmi@google.com>
Tue, 3 Nov 2009 10:51:35 +0000 (11:51 +0100)
Also replaces a hardcoded limit of 15 seconds with 1/4
of NET_RECONFIG_TIMEOUT.

Signed-off-by: Michael Hanselmann <hansmi@google.com>
Reviewed-by: Iustin Pop <iustin@google.com>

lib/bdev.py

index 595f7ae..0b95313 100644 (file)
@@ -1225,20 +1225,18 @@ class DRBD8(BaseDRBD):
       _ThrowError("drbd%d: can't setup network: %s - %s",
                   minor, result.fail_reason, result.output)
 
-    timeout = time.time() + 10
-    ok = False
-    while time.time() < timeout:
+    def _CheckNetworkConfig():
       info = cls._GetDevInfo(cls._GetShowData(minor))
       if not "local_addr" in info or not "remote_addr" in info:
-        time.sleep(1)
-        continue
+        raise utils.RetryAgain()
+
       if (info["local_addr"] != (lhost, lport) or
           info["remote_addr"] != (rhost, rport)):
-        time.sleep(1)
-        continue
-      ok = True
-      break
-    if not ok:
+        raise utils.RetryAgain()
+
+    try:
+      utils.Retry(_CheckNetworkConfig, 1.0, 10.0)
+    except utils.RetryTimeout:
       _ThrowError("drbd%d: timeout while configuring network", minor)
 
   def AddChildren(self, devices):
@@ -1431,31 +1429,42 @@ class DRBD8(BaseDRBD):
       _ThrowError("drbd%d: DRBD disk missing network info in"
                   " DisconnectNet()", self.minor)
 
-    ever_disconnected = _IgnoreError(self._ShutdownNet, self.minor)
-    timeout_limit = time.time() + self._NET_RECONFIG_TIMEOUT
-    sleep_time = 0.100 # we start the retry time at 100 milliseconds
-    while time.time() < timeout_limit:
-      status = self.GetProcStatus()
-      if status.is_standalone:
-        break
-      # retry the disconnect, it seems possible that due to a
-      # well-time disconnect on the peer, my disconnect command might
-      # be ignored and forgotten
-      ever_disconnected = _IgnoreError(self._ShutdownNet, self.minor) or \
-                          ever_disconnected
-      time.sleep(sleep_time)
-      sleep_time = min(2, sleep_time * 1.5)
+    class _DisconnectStatus:
+      def __init__(self, ever_disconnected):
+        self.ever_disconnected = ever_disconnected
 
-    if not status.is_standalone:
-      if ever_disconnected:
+    dstatus = _DisconnectStatus(_IgnoreError(self._ShutdownNet, self.minor))
+
+    def _WaitForDisconnect():
+      if self.GetProcStatus().is_standalone:
+        return
+
+      # retry the disconnect, it seems possible that due to a well-time
+      # disconnect on the peer, my disconnect command might be ignored and
+      # forgotten
+      dstatus.ever_disconnected = \
+        _IgnoreError(self._ShutdownNet, self.minor) or dstatus.ever_disconnected
+
+      raise utils.RetryAgain()
+
+    # Keep start time
+    start_time = time.time()
+
+    try:
+      # Start delay at 100 milliseconds and grow up to 2 seconds
+      utils.Retry(_WaitForDisconnect, (0.1, 1.5, 2.0),
+                  self._NET_RECONFIG_TIMEOUT)
+    except utils.RetryTimeout:
+      if dstatus.ever_disconnected:
         msg = ("drbd%d: device did not react to the"
                " 'disconnect' command in a timely manner")
       else:
         msg = "drbd%d: can't shutdown network, even after multiple retries"
+
       _ThrowError(msg, self.minor)
 
-    reconfig_time = time.time() - timeout_limit + self._NET_RECONFIG_TIMEOUT
-    if reconfig_time > 15: # hardcoded alert limit
+    reconfig_time = time.time() - start_time
+    if reconfig_time > (self._NET_RECONFIG_TIMEOUT * 0.25):
       logging.info("drbd%d: DisconnectNet: detach took %.3f seconds",
                    self.minor, reconfig_time)