Revision def8e2f6
b/lib/bdev.py | ||
---|---|---|
1225 | 1225 |
_ThrowError("drbd%d: can't setup network: %s - %s", |
1226 | 1226 |
minor, result.fail_reason, result.output) |
1227 | 1227 |
|
1228 |
timeout = time.time() + 10 |
|
1229 |
ok = False |
|
1230 |
while time.time() < timeout: |
|
1228 |
def _CheckNetworkConfig(): |
|
1231 | 1229 |
info = cls._GetDevInfo(cls._GetShowData(minor)) |
1232 | 1230 |
if not "local_addr" in info or not "remote_addr" in info: |
1233 |
time.sleep(1)
|
|
1234 |
continue |
|
1231 |
raise utils.RetryAgain()
|
|
1232 |
|
|
1235 | 1233 |
if (info["local_addr"] != (lhost, lport) or |
1236 | 1234 |
info["remote_addr"] != (rhost, rport)): |
1237 |
time.sleep(1)
|
|
1238 |
continue |
|
1239 |
ok = True
|
|
1240 |
break
|
|
1241 |
if not ok:
|
|
1235 |
raise utils.RetryAgain()
|
|
1236 |
|
|
1237 |
try:
|
|
1238 |
utils.Retry(_CheckNetworkConfig, 1.0, 10.0)
|
|
1239 |
except utils.RetryTimeout:
|
|
1242 | 1240 |
_ThrowError("drbd%d: timeout while configuring network", minor) |
1243 | 1241 |
|
1244 | 1242 |
def AddChildren(self, devices): |
... | ... | |
1431 | 1429 |
_ThrowError("drbd%d: DRBD disk missing network info in" |
1432 | 1430 |
" DisconnectNet()", self.minor) |
1433 | 1431 |
|
1434 |
ever_disconnected = _IgnoreError(self._ShutdownNet, self.minor) |
|
1435 |
timeout_limit = time.time() + self._NET_RECONFIG_TIMEOUT |
|
1436 |
sleep_time = 0.100 # we start the retry time at 100 milliseconds |
|
1437 |
while time.time() < timeout_limit: |
|
1438 |
status = self.GetProcStatus() |
|
1439 |
if status.is_standalone: |
|
1440 |
break |
|
1441 |
# retry the disconnect, it seems possible that due to a |
|
1442 |
# well-time disconnect on the peer, my disconnect command might |
|
1443 |
# be ignored and forgotten |
|
1444 |
ever_disconnected = _IgnoreError(self._ShutdownNet, self.minor) or \ |
|
1445 |
ever_disconnected |
|
1446 |
time.sleep(sleep_time) |
|
1447 |
sleep_time = min(2, sleep_time * 1.5) |
|
1432 |
class _DisconnectStatus: |
|
1433 |
def __init__(self, ever_disconnected): |
|
1434 |
self.ever_disconnected = ever_disconnected |
|
1448 | 1435 |
|
1449 |
if not status.is_standalone: |
|
1450 |
if ever_disconnected: |
|
1436 |
dstatus = _DisconnectStatus(_IgnoreError(self._ShutdownNet, self.minor)) |
|
1437 |
|
|
1438 |
def _WaitForDisconnect(): |
|
1439 |
if self.GetProcStatus().is_standalone: |
|
1440 |
return |
|
1441 |
|
|
1442 |
# retry the disconnect, it seems possible that due to a well-time |
|
1443 |
# disconnect on the peer, my disconnect command might be ignored and |
|
1444 |
# forgotten |
|
1445 |
dstatus.ever_disconnected = \ |
|
1446 |
_IgnoreError(self._ShutdownNet, self.minor) or dstatus.ever_disconnected |
|
1447 |
|
|
1448 |
raise utils.RetryAgain() |
|
1449 |
|
|
1450 |
# Keep start time |
|
1451 |
start_time = time.time() |
|
1452 |
|
|
1453 |
try: |
|
1454 |
# Start delay at 100 milliseconds and grow up to 2 seconds |
|
1455 |
utils.Retry(_WaitForDisconnect, (0.1, 1.5, 2.0), |
|
1456 |
self._NET_RECONFIG_TIMEOUT) |
|
1457 |
except utils.RetryTimeout: |
|
1458 |
if dstatus.ever_disconnected: |
|
1451 | 1459 |
msg = ("drbd%d: device did not react to the" |
1452 | 1460 |
" 'disconnect' command in a timely manner") |
1453 | 1461 |
else: |
1454 | 1462 |
msg = "drbd%d: can't shutdown network, even after multiple retries" |
1463 |
|
|
1455 | 1464 |
_ThrowError(msg, self.minor) |
1456 | 1465 |
|
1457 |
reconfig_time = time.time() - timeout_limit + self._NET_RECONFIG_TIMEOUT
|
|
1458 |
if reconfig_time > 15: # hardcoded alert limit
|
|
1466 |
reconfig_time = time.time() - start_time
|
|
1467 |
if reconfig_time > (self._NET_RECONFIG_TIMEOUT * 0.25):
|
|
1459 | 1468 |
logging.info("drbd%d: DisconnectNet: detach took %.3f seconds", |
1460 | 1469 |
self.minor, reconfig_time) |
1461 | 1470 |
|
Also available in: Unified diff