status = status and child.Assemble()
if not status:
break
- status = status and child.Open()
+
+ try:
+ child.Open()
+ except errors.BlockDeviceError:
+ for child in self._children:
+ child.Shutdown()
+ raise
if not status:
for child in self._children:
status of the mirror.
Returns:
- (sync_percent, estimated_time, is_degraded)
+ (sync_percent, estimated_time, is_degraded, ldisk)
+
+ If sync_percent is None, it means the device is not syncing.
- If sync_percent is None, it means all is ok
If estimated_time is None, it means we can't estimate
- the time needed, otherwise it's the time left in seconds
+ the time needed, otherwise it's the time left in seconds.
+
If is_degraded is True, it means the device is missing
redundancy. This is usually a sign that something went wrong in
the device setup, if sync_percent is None.
+ The ldisk parameter represents the degradation of the local
+ data. This is only valid for some devices, the rest will always
+ return False (not degraded).
+
"""
- return None, None, False
+ return None, None, False, False
def CombinedSyncStatus(self):
children.
"""
- min_percent, max_time, is_degraded = self.GetSyncStatus()
+ min_percent, max_time, is_degraded, ldisk = self.GetSyncStatus()
if self._children:
for child in self._children:
- c_percent, c_time, c_degraded = child.GetSyncStatus()
+ c_percent, c_time, c_degraded, c_ldisk = child.GetSyncStatus()
if min_percent is None:
min_percent = c_percent
elif c_percent is not None:
elif c_time is not None:
max_time = max(max_time, c_time)
is_degraded = is_degraded or c_degraded
- return min_percent, max_time, is_degraded
+ ldisk = ldisk or c_ldisk
+ return min_percent, max_time, is_degraded, ldisk
def SetInfo(self, text):
self._lv_name = new_name
self.dev_path = "/dev/%s/%s" % (self._vg_name, self._lv_name)
-
def Attach(self):
"""Attach to an existing LV.
This method will try to see if an existing and active LV exists
- which matches the our name. If so, its major/minor will be
+ which matches our name. If so, its major/minor will be
recorded.
"""
result = utils.RunCmd(["lvdisplay", self.dev_path])
if result.failed:
- logger.Error("Can't find LV %s: %s" %
- (self.dev_path, result.fail_reason))
+ logger.Error("Can't find LV %s: %s, %s" %
+ (self.dev_path, result.fail_reason, result.output))
return False
match = re.compile("^ *Block device *([0-9]+):([0-9]+).*$")
for line in result.stdout.splitlines():
def Assemble(self):
"""Assemble the device.
- This is a no-op for the LV device type. Eventually, we could
- lvchange -ay here if we see that the LV is not active.
+ We alway run `lvchange -ay` on the LV to ensure it's active before
+ use, as there were cases when xenvg was not active after boot
+ (also possibly after disk issues).
"""
- return True
+ result = utils.RunCmd(["lvchange", "-ay", self.dev_path])
+ if result.failed:
+ logger.Error("Can't activate lv %s: %s" % (self.dev_path, result.output))
+ return not result.failed
def Shutdown(self):
"""Shutdown the device.
return retval
+ def GetSyncStatus(self):
+ """Returns the sync status of the device.
+
+ If this device is a mirroring device, this function returns the
+ status of the mirror.
+
+ Returns:
+ (sync_percent, estimated_time, is_degraded, ldisk)
+
+ For logical volumes, sync_percent and estimated_time are always
+ None (no recovery in progress, as we don't handle the mirrored LV
+ case). The is_degraded parameter is the inverse of the ldisk
+ parameter.
+
+ For the ldisk parameter, we check if the logical volume has the
+ 'virtual' type, which means it's not backed by existing storage
+ anymore (read from it return I/O error). This happens after a
+ physical disk failure and subsequent 'vgreduce --removemissing' on
+ the volume group.
+
+ """
+ result = utils.RunCmd(["lvs", "--noheadings", "-olv_attr", self.dev_path])
+ if result.failed:
+ logger.Error("Can't display lv: %s" % result.fail_reason)
+ return None, None, True, True
+ out = result.stdout.strip()
+ # format: type/permissions/alloc/fixed_minor/state/open
+ if len(out) != 6:
+ logger.Debug("Error in lvs output: attrs=%s, len != 6" % out)
+ return None, None, True, True
+ ldisk = out[0] == 'v' # virtual volume, i.e. doesn't have
+ # backing storage
+ return None, None, ldisk, ldisk
+
def Open(self, force=False):
"""Make the device ready for I/O.
This is a no-op for the LV device type.
"""
- return True
+ pass
def Close(self):
"""Notifies that the device will no longer be used for I/O.
This is a no-op for the LV device type.
"""
- return True
+ pass
def Snapshot(self, size):
"""Create a snapshot copy of an lvm block device.
"""Returns the sync status of the device.
Returns:
- (sync_percent, estimated_time, is_degraded)
+ (sync_percent, estimated_time, is_degraded, ldisk)
If sync_percent is None, it means all is ok
If estimated_time is None, it means we can't esimate
- the time needed, otherwise it's the time left in seconds
+ the time needed, otherwise it's the time left in seconds.
+
+ The ldisk parameter is always true for MD devices.
"""
if self.minor is None and not self.Attach():
sync_status = f.readline().strip()
f.close()
if sync_status == "idle":
- return None, None, not is_clean
+ return None, None, not is_clean, False
f = file(sys_path + "sync_completed")
sync_completed = f.readline().strip().split(" / ")
f.close()
if len(sync_completed) != 2:
- return 0, None, not is_clean
+ return 0, None, not is_clean, False
sync_done, sync_total = [float(i) for i in sync_completed]
sync_percent = 100.0*sync_done/sync_total
f = file(sys_path + "sync_speed")
time_est = None
else:
time_est = (sync_total - sync_done) / 2 / sync_speed_k
- return sync_percent, time_est, not is_clean
+ return sync_percent, time_est, not is_clean, False
def Open(self, force=False):
"""Make the device ready for I/O.
the 2.6.18's new array_state thing.
"""
- return True
+ pass
def Close(self):
"""Notifies that the device will no longer be used for I/O.
`Open()`.
"""
- return True
+ pass
class BaseDRBD(BlockDev):
"""
_VERSION_RE = re.compile(r"^version: (\d+)\.(\d+)\.(\d+)"
- r" \(api:(\d+)/proto:(\d+)\)")
+ r" \(api:(\d+)/proto:(\d+)(?:-(\d+))?\)")
+
_DRBD_MAJOR = 147
_ST_UNCONFIGURED = "Unconfigured"
_ST_WFCONNECTION = "WFConnection"
def _GetVersion(cls):
"""Return the DRBD version.
- This will return a list [k_major, k_minor, k_point, api, proto].
+ This will return a dict with keys:
+ k_major,
+ k_minor,
+ k_point,
+ api,
+ proto,
+ proto2 (only on drbd > 8.2.X)
"""
proc_data = cls._GetProcData()
if not version:
raise errors.BlockDeviceError("Can't parse DRBD version from '%s'" %
first_line)
- return [int(val) for val in version.groups()]
+
+ values = version.groups()
+ retval = {'k_major': int(values[0]),
+ 'k_minor': int(values[1]),
+ 'k_point': int(values[2]),
+ 'api': int(values[3]),
+ 'proto': int(values[4]),
+ }
+ if values[5] is not None:
+ retval['proto2'] = values[5]
+
+ return retval
@staticmethod
def _DevPath(minor):
def __init__(self, unique_id, children):
super(DRBDev, self).__init__(unique_id, children)
self.major = self._DRBD_MAJOR
- [kmaj, kmin, kfix, api, proto] = self._GetVersion()
- if kmaj != 0 and kmin != 7:
+ version = self._GetVersion()
+ if version['k_major'] != 0 and version['k_minor'] != 7:
raise errors.BlockDeviceError("Mismatch in DRBD kernel version and"
" requested ganeti usage: kernel is"
- " %s.%s, ganeti wants 0.7" % (kmaj, kmin))
-
+ " %s.%s, ganeti wants 0.7" %
+ (version['k_major'], version['k_minor']))
if len(children) != 2:
raise ValueError("Invalid configuration data %s" % str(children))
if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 4:
"""
result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "disconnect"])
- logger.Error("Can't shutdown network: %s" % result.output)
+ if result.failed:
+ logger.Error("Can't shutdown network: %s" % result.output)
return not result.failed
def Assemble(self):
cmd.append("--do-what-I-say")
result = utils.RunCmd(cmd)
if result.failed:
- logger.Error("Can't make drbd device primary: %s" % result.output)
- return False
- return True
+ msg = ("Can't make drbd device primary: %s" % result.output)
+ logger.Error(msg)
+ raise errors.BlockDeviceError(msg)
def Close(self):
"""Make the local state secondary.
raise errors.BlockDeviceError("Can't find device")
result = utils.RunCmd(["drbdsetup", self.dev_path, "secondary"])
if result.failed:
- logger.Error("Can't switch drbd device to secondary: %s" % result.output)
- raise errors.BlockDeviceError("Can't switch drbd device to secondary")
+ msg = ("Can't switch drbd device to"
+ " secondary: %s" % result.output)
+ logger.Error(msg)
+ raise errors.BlockDeviceError(msg)
def SetSyncSpeed(self, kbytes):
"""Set the speed of the DRBD syncer.
"""Returns the sync status of the device.
Returns:
- (sync_percent, estimated_time, is_degraded)
+ (sync_percent, estimated_time, is_degraded, ldisk)
If sync_percent is None, it means all is ok
If estimated_time is None, it means we can't esimate
- the time needed, otherwise it's the time left in seconds
+ the time needed, otherwise it's the time left in seconds.
+
+ The ldisk parameter will be returned as True, since the DRBD7
+ devices have not been converted.
"""
if self.minor is None and not self.Attach():
self.minor)
client_state = match.group(1)
is_degraded = client_state != "Connected"
- return sync_percent, est_time, is_degraded
+ return sync_percent, est_time, is_degraded, False
def GetStatus(self):
"""Compute the status of the DRBD device
_PARSE_SHOW = None
def __init__(self, unique_id, children):
+ if children and children.count(None) > 0:
+ children = []
super(DRBD8, self).__init__(unique_id, children)
self.major = self._DRBD_MAJOR
- [kmaj, kmin, kfix, api, proto] = self._GetVersion()
- if kmaj != 8:
+ version = self._GetVersion()
+ if version['k_major'] != 8 :
raise errors.BlockDeviceError("Mismatch in DRBD kernel version and"
" requested ganeti usage: kernel is"
- " %s.%s, ganeti wants 8.x" % (kmaj, kmin))
+ " %s.%s, ganeti wants 8.x" %
+ (version['k_major'], version['k_minor']))
if len(children) not in (0, 2):
raise ValueError("Invalid configuration data %s" % str(children))
rbrace = pyp.Literal("}").suppress()
semi = pyp.Literal(";").suppress()
# this also converts the value to an int
- number = pyp.Word(pyp.nums).setParseAction(lambda s, l, t:(l, [int(t[0])]))
+ number = pyp.Word(pyp.nums).setParseAction(lambda s, l, t: int(t[0]))
comment = pyp.Literal ("#") + pyp.Optional(pyp.restOfLine)
defa = pyp.Literal("_is_default").suppress()
return bnf
@classmethod
- def _GetDevInfo(cls, minor):
- """Get details about a given DRBD minor.
-
- This return, if available, the local backing device (as a path)
- and the local and remote (ip, port) information.
+ def _GetShowData(cls, minor):
+ """Return the `drbdsetup show` data for a minor.
"""
- data = {}
result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "show"])
if result.failed:
logger.Error("Can't display the drbd config: %s" % result.fail_reason)
- return data
- out = result.stdout
+ return None
+ return result.stdout
+
+ @classmethod
+ def _GetDevInfo(cls, out):
+ """Parse details about a given DRBD minor.
+
+ This return, if available, the local backing device (as a path)
+ and the local and remote (ip, port) information from a string
+ containing the output of the `drbdsetup show` command as returned
+ by _GetShowData.
+
+ """
+ data = {}
if not out:
return data
"""
lhost, lport, rhost, rport = net_info
+ if None in net_info:
+ # we don't want network connection and actually want to make
+ # sure its shutdown
+ return cls._ShutdownNet(minor)
+
args = ["drbdsetup", cls._DevPath(minor), "net",
"%s:%s" % (lhost, lport), "%s:%s" % (rhost, rport), protocol,
"-A", "discard-zero-changes",
timeout = time.time() + 10
ok = False
while time.time() < timeout:
- info = cls._GetDevInfo(minor)
+ info = cls._GetDevInfo(cls._GetShowData(minor))
if not "local_addr" in info or not "remote_addr" in info:
time.sleep(1)
continue
"""
if self.minor is None:
raise errors.BlockDeviceError("Can't attach to dbrd8 during AddChildren")
-
if len(devices) != 2:
raise errors.BlockDeviceError("Need two devices for AddChildren")
- if self._children:
+ info = self._GetDevInfo(self._GetShowData(self.minor))
+ if "local_dev" in info:
raise errors.BlockDeviceError("DRBD8 already attached to a local disk")
backend, meta = devices
if backend.dev_path is None or meta.dev_path is None:
if self.minor is None:
raise errors.BlockDeviceError("Can't attach to drbd8 during"
" RemoveChildren")
+ # early return if we don't actually have backing storage
+ info = self._GetDevInfo(self._GetShowData(self.minor))
+ if "local_dev" not in info:
+ return
if len(self._children) != 2:
raise errors.BlockDeviceError("We don't have two children: %s" %
self._children)
If sync_percent is None, it means all is ok
If estimated_time is None, it means we can't esimate
- the time needed, otherwise it's the time left in seconds
+ the time needed, otherwise it's the time left in seconds.
+
+
+ We set the is_degraded parameter to True on two conditions:
+ network not connected or local disk missing.
+
+ We compute the ldisk parameter based on wheter we have a local
+ disk or not.
"""
if self.minor is None and not self.Attach():
self.minor)
client_state = match.group(1)
local_disk_state = match.group(2)
- is_degraded = (client_state != "Connected" or
- local_disk_state != "UpToDate")
- return sync_percent, est_time, is_degraded
+ ldisk = local_disk_state != "UpToDate"
+ is_degraded = client_state != "Connected"
+ return sync_percent, est_time, is_degraded or ldisk, ldisk
def GetStatus(self):
"""Compute the status of the DRBD device
cmd.append("-o")
result = utils.RunCmd(cmd)
if result.failed:
- logger.Error("Can't make drbd device primary: %s" % result.output)
- return False
- return True
+ msg = ("Can't make drbd device primary: %s" % result.output)
+ logger.Error(msg)
+ raise errors.BlockDeviceError(msg)
def Close(self):
"""Make the local state secondary.
raise errors.BlockDeviceError("Can't find device")
result = utils.RunCmd(["drbdsetup", self.dev_path, "secondary"])
if result.failed:
- logger.Error("Can't switch drbd device to secondary: %s" % result.output)
- raise errors.BlockDeviceError("Can't switch drbd device to secondary")
+ msg = ("Can't switch drbd device to"
+ " secondary: %s" % result.output)
+ logger.Error(msg)
+ raise errors.BlockDeviceError(msg)
def Attach(self):
"""Find a DRBD device which matches our config and attach to it.
"""
for minor in self._GetUsedDevs():
- info = self._GetDevInfo(minor)
+ info = self._GetDevInfo(self._GetShowData(minor))
match_l = self._MatchesLocal(info)
match_r = self._MatchesNet(info)
if match_l and match_r:
(self._lhost, self._lport,
self._rhost, self._rport),
"C")
- if res_r and self._MatchesNet(self._GetDevInfo(minor)):
+ if res_r:
+ if self._MatchesNet(self._GetDevInfo(self._GetShowData(minor))):
+ break
+ # the weakest case: we find something that is only net attached
+ # even though we were passed some children at init time
+ if match_r and "local_dev" not in info:
+ break
+ if match_l and not match_r and "local_addr" in info:
+ # strange case - the device network part points to somewhere
+ # else, even though its local storage is ours; as we own the
+ # drbd space, we try to disconnect from the remote peer and
+ # reconnect to our correct one
+ if not self._ShutdownNet(minor):
+ raise errors.BlockDeviceError("Device has correct local storage,"
+ " wrong remote peer and is unable to"
+ " disconnect in order to attach to"
+ " the correct peer")
+ # note: _AssembleNet also handles the case when we don't want
+ # local storage (i.e. one or more of the _[lr](host|port) is
+ # None)
+ if (self._AssembleNet(minor, (self._lhost, self._lport,
+ self._rhost, self._rport), "C") and
+ self._MatchesNet(self._GetDevInfo(self._GetShowData(minor)))):
break
+
else:
minor = None
minor = self._FindUnusedMinor()
need_localdev_teardown = False
- if self._children[0]:
+ if self._children and self._children[0] and self._children[1]:
result = self._AssembleLocal(minor, self._children[0].dev_path,
self._children[1].dev_path)
if not result:
"""
result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "disconnect"])
- logger.Error("Can't shutdown network: %s" % result.output)
+ if result.failed:
+ logger.Error("Can't shutdown network: %s" % result.output)
return not result.failed
@classmethod
self.dev_path = None
return True
- def Rename(self, new_uid):
- """Re-connect this device to another peer.
-
- """
- if self.minor is None:
- raise errors.BlockDeviceError("Device not attached during rename")
- if self._rhost is not None:
- # this means we did have a host when we attached, so we are connected
- if not self._ShutdownNet(self.minor):
- raise errors.BlockDeviceError("Can't disconnect from remote peer")
- old_id = self.unique_id
- else:
- old_id = None
- self.unique_id = new_uid
- if not self._AssembleNet(self.minor, self.unique_id, "C"):
- logger.Error("Can't attach to new peer!")
- if old_id is not None:
- self._AssembleNet(self.minor, old_id, "C")
- self.unique_id = old_id
- raise errors.BlockDeviceError("Can't attach to new peer")
-
def Remove(self):
"""Stub remove for DRBD devices.
device = DEV_MAP[dev_type](unique_id, children)
if not device.Attach():
device.Assemble()
- if not device.Attach():
- raise errors.BlockDeviceError("Can't find a valid block device for"
- " %s/%s/%s" %
- (dev_type, unique_id, children))
+ if not device.Attach():
+ raise errors.BlockDeviceError("Can't find a valid block device for"
+ " %s/%s/%s" %
+ (dev_type, unique_id, children))
return device