from ganeti import constants
+def _IgnoreError(fn, *args, **kwargs):
+ """Executes the given function, ignoring BlockDeviceErrors.
+
+ This is used in order to simplify the execution of cleanup or
+ rollback functions.
+
+ @rtype: boolean
+ @return: True when fn didn't raise an exception, False otherwise
+
+ """
+ try:
+ fn(*args, **kwargs)
+ return True
+ except errors.BlockDeviceError, err:
+ logging.warning("Caught BlockDeviceError but ignoring: %s" % str(err))
+ return False
+
+
+def _ThrowError(msg, *args):
+ """Log an error to the node daemon and the raise an exception.
+
+ @type msg: string
+ @param msg: the text of the exception
+ @raise errors.BlockDeviceError
+
+ """
+ if args:
+ msg = msg % args
+ logging.error(msg)
+ raise errors.BlockDeviceError(msg)
+
+
class BlockDev(object):
"""Block device abstract class.
def Assemble(self):
"""Assemble the device from its components.
- If this is a plain block device (e.g. LVM) than assemble does
- nothing, as the LVM has no children and we don't put logical
- volumes offline.
-
- One guarantee is that after the device has been assembled, it
- knows its major/minor numbers. This allows other devices (usually
- parents) to probe correctly for their children.
+ Implementations of this method by child classes must ensure that:
+ - after the device has been assembled, it knows its major/minor
+ numbers; this allows other devices (usually parents) to probe
+ correctly for their children
+ - calling this method on an existing, in-use device is safe
+ - if the device is already configured (and in an OK state),
+ this method is idempotent
"""
- status = True
- for child in self._children:
- if not isinstance(child, BlockDev):
- raise TypeError("Invalid child passed of type '%s'" % type(child))
- if not status:
- break
- status = status and child.Assemble()
- if not status:
- break
-
- try:
- child.Open()
- except errors.BlockDeviceError:
- for child in self._children:
- child.Shutdown()
- raise
-
- if not status:
- for child in self._children:
- child.Shutdown()
- return status
+ pass
def Attach(self):
"""Find a device which matches our config and attach to it.
If this device is a mirroring device, this function returns the
status of the mirror.
- Returns:
- (sync_percent, estimated_time, is_degraded, ldisk)
-
If sync_percent is None, it means the device is not syncing.
If estimated_time is None, it means we can't estimate
data. This is only valid for some devices, the rest will always
return False (not degraded).
+ @rtype: tuple
+ @return: (sync_percent, estimated_time, is_degraded, ldisk)
+
"""
return None, None, False, False
def Grow(self, amount):
"""Grow the block device.
- Arguments:
- amount: the amount (in mebibytes) to grow with
-
- Returns: None
+ @param amount: the amount (in mebibytes) to grow with
"""
raise NotImplementedError
"""
if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2:
- raise ValueError("Invalid configuration data %s" % str(unique_id))
+ raise errors.ProgrammerError("Invalid configuration data %s" %
+ str(unique_id))
vg_name, lv_name = unique_id
pvs_info = cls.GetPVInfo(vg_name)
if not pvs_info:
- raise errors.BlockDeviceError("Can't compute PV info for vg %s" %
- vg_name)
+ _ThrowError("Can't compute PV info for vg %s", vg_name)
pvs_info.sort()
pvs_info.reverse()
# The size constraint should have been checked from the master before
# calling the create function.
if free_size < size:
- raise errors.BlockDeviceError("Not enough free space: required %s,"
- " available %s" % (size, free_size))
+ _ThrowError("Not enough free space: required %s,"
+ " available %s", size, free_size)
result = utils.RunCmd(["lvcreate", "-L%dm" % size, "-n%s" % lv_name,
vg_name] + pvlist)
if result.failed:
- raise errors.BlockDeviceError("%s - %s" % (result.fail_reason,
- result.output))
+ _ThrowError("LV create failed (%s): %s",
+ result.fail_reason, result.output)
return LogicalVolume(unique_id, children)
@staticmethod
def GetPVInfo(vg_name):
"""Get the free space info for PVs in a volume group.
- Args:
- vg_name: the volume group name
+ @param vg_name: the volume group name
- Returns:
- list of (free_space, name) with free_space in mebibytes
+ @rtype: list
+ @return: list of tuples (free_space, name) with free_space in mebibytes
"""
command = ["pvs", "--noheadings", "--nosuffix", "--units=m",
"""
if not self.minor and not self.Attach():
# the LV does not exist
- return True
+ return
result = utils.RunCmd(["lvremove", "-f", "%s/%s" %
(self._vg_name, self._lv_name)])
if result.failed:
- logging.error("Can't lvremove: %s - %s",
- result.fail_reason, result.output)
-
- return not result.failed
+ _ThrowError("Can't lvremove: %s - %s", result.fail_reason, result.output)
def Rename(self, new_id):
"""Rename this logical volume.
(self._vg_name, new_vg))
result = utils.RunCmd(["lvrename", new_vg, self._lv_name, new_name])
if result.failed:
- raise errors.BlockDeviceError("Failed to rename the logical volume: %s" %
- result.output)
+ _ThrowError("Failed to rename the logical volume: %s", result.output)
self._lv_name = new_name
self.dev_path = "/dev/%s/%s" % (self._vg_name, self._lv_name)
"""
result = utils.RunCmd(["lvchange", "-ay", self.dev_path])
if result.failed:
- logging.error("Can't activate lv %s: %s", self.dev_path, result.output)
- return False
- return self.Attach()
+ _ThrowError("Can't activate lv %s: %s", self.dev_path, result.output)
def Shutdown(self):
"""Shutdown the device.
volumes on shutdown.
"""
- return True
+ pass
def GetSyncStatus(self):
"""Returns the sync status of the device.
If this device is a mirroring device, this function returns the
status of the mirror.
- Returns:
- (sync_percent, estimated_time, is_degraded, ldisk)
-
For logical volumes, sync_percent and estimated_time are always
None (no recovery in progress, as we don't handle the mirrored LV
case). The is_degraded parameter is the inverse of the ldisk
The status was already read in Attach, so we just return it.
+ @rtype: tuple
+ @return: (sync_percent, estimated_time, is_degraded, ldisk)
+
"""
return None, None, self._degraded, self._degraded
# remove existing snapshot if found
snap = LogicalVolume((self._vg_name, snap_name), None)
- snap.Remove()
+ _IgnoreError(snap.Remove)
pvs_info = self.GetPVInfo(self._vg_name)
if not pvs_info:
- raise errors.BlockDeviceError("Can't compute PV info for vg %s" %
- self._vg_name)
+ _ThrowError("Can't compute PV info for vg %s", self._vg_name)
pvs_info.sort()
pvs_info.reverse()
free_size, pv_name = pvs_info[0]
if free_size < size:
- raise errors.BlockDeviceError("Not enough free space: required %s,"
- " available %s" % (size, free_size))
+ _ThrowError("Not enough free space: required %s,"
+ " available %s", size, free_size)
result = utils.RunCmd(["lvcreate", "-L%dm" % size, "-s",
"-n%s" % snap_name, self.dev_path])
if result.failed:
- raise errors.BlockDeviceError("command: %s error: %s - %s" %
- (result.cmd, result.fail_reason,
- result.output))
+ _ThrowError("command: %s error: %s - %s",
+ result.cmd, result.fail_reason, result.output)
return snap_name
result = utils.RunCmd(["lvchange", "--addtag", text,
self.dev_path])
if result.failed:
- raise errors.BlockDeviceError("Command: %s error: %s - %s" %
- (result.cmd, result.fail_reason,
- result.output))
+ _ThrowError("Command: %s error: %s - %s", result.cmd, result.fail_reason,
+ result.output)
+
def Grow(self, amount):
"""Grow the logical volume.
"-L", "+%dm" % amount, self.dev_path])
if not result.failed:
return
- raise errors.BlockDeviceError("Can't grow LV %s: %s" %
- (self.dev_path, result.output))
+ _ThrowError("Can't grow LV %s: %s", self.dev_path, result.output)
class DRBD8Status(object):
Note that this doesn't support unconfigured devices (cs:Unconfigured).
"""
- LINE_RE = re.compile(r"\s*[0-9]+:\s*cs:(\S+)\s+st:([^/]+)/(\S+)"
+ UNCONF_RE = re.compile(r"\s*[0-9]+:\s*cs:Unconfigured$")
+ LINE_RE = re.compile(r"\s*[0-9]+:\s*cs:(\S+)\s+(?:st|ro):([^/]+)/(\S+)"
"\s+ds:([^/]+)/(\S+)\s+.*$")
SYNC_RE = re.compile(r"^.*\ssync'ed:\s*([0-9.]+)%.*"
"\sfinish: ([0-9]+):([0-9]+):([0-9]+)\s.*$")
def __init__(self, procline):
- m = self.LINE_RE.match(procline)
- if not m:
- raise errors.BlockDeviceError("Can't parse input data '%s'" % procline)
- self.cstatus = m.group(1)
- self.lrole = m.group(2)
- self.rrole = m.group(3)
- self.ldisk = m.group(4)
- self.rdisk = m.group(5)
+ u = self.UNCONF_RE.match(procline)
+ if u:
+ self.cstatus = "Unconfigured"
+ self.lrole = self.rrole = self.ldisk = self.rdisk = None
+ else:
+ m = self.LINE_RE.match(procline)
+ if not m:
+ raise errors.BlockDeviceError("Can't parse input data '%s'" % procline)
+ self.cstatus = m.group(1)
+ self.lrole = m.group(2)
+ self.rrole = m.group(3)
+ self.ldisk = m.group(4)
+ self.rdisk = m.group(5)
+
+ # end reading of data from the LINE_RE or UNCONF_RE
self.is_standalone = self.cstatus == "StandAlone"
self.is_wfconn = self.cstatus == "WFConnection"
self.is_diskless = self.ldisk == "Diskless"
self.is_disk_uptodate = self.ldisk == "UpToDate"
+ self.is_in_resync = self.cstatus in ("SyncSource", "SyncTarget")
+ self.is_in_use = self.cstatus != "Unconfigured"
+
m = self.SYNC_RE.match(procline)
if m:
self.sync_percent = float(m.group(1))
"""Return data from /proc/drbd.
"""
- stat = open(filename, "r")
try:
- data = stat.read().splitlines()
- finally:
- stat.close()
+ stat = open(filename, "r")
+ try:
+ data = stat.read().splitlines()
+ finally:
+ stat.close()
+ except EnvironmentError, err:
+ if err.errno == errno.ENOENT:
+ _ThrowError("The file %s cannot be opened, check if the module"
+ " is loaded (%s)", filename, str(err))
+ else:
+ _ThrowError("Can't read the DRBD proc file %s: %s", filename, str(err))
if not data:
- raise errors.BlockDeviceError("Can't read any data from %s" % filename)
+ _ThrowError("Can't read any data from %s", filename)
return data
@staticmethod
def _MassageProcData(data):
"""Transform the output of _GetProdData into a nicer form.
- Returns:
- a dictionary of minor: joined lines from /proc/drbd for that minor
+ @return: a dictionary of minor: joined lines from /proc/drbd
+ for that minor
"""
lmatch = re.compile("^ *([0-9]+):.*$")
"""Return the DRBD version.
This will return a dict with keys:
- k_major,
- k_minor,
- k_point,
- api,
- proto,
- proto2 (only on drbd > 8.2.X)
+ - k_major
+ - k_minor
+ - k_point
+ - api
+ - proto
+ - proto2 (only on drbd > 8.2.X)
"""
proc_data = cls._GetProcData()
return "/dev/drbd%d" % minor
@classmethod
- def _GetUsedDevs(cls):
+ def GetUsedDevs(cls):
"""Compute the list of used DRBD devices.
"""
"""
result = utils.RunCmd(["blockdev", "--getsize", meta_device])
if result.failed:
- logging.error("Failed to get device size: %s - %s",
- result.fail_reason, result.output)
- return False
+ _ThrowError("Failed to get device size: %s - %s",
+ result.fail_reason, result.output)
try:
sectors = int(result.stdout)
except ValueError:
- logging.error("Invalid output from blockdev: '%s'", result.stdout)
- return False
+ _ThrowError("Invalid output from blockdev: '%s'", result.stdout)
bytes = sectors * 512
if bytes < 128 * 1024 * 1024: # less than 128MiB
- logging.error("Meta device too small (%.2fMib)", (bytes / 1024 / 1024))
- return False
+ _ThrowError("Meta device too small (%.2fMib)", (bytes / 1024 / 1024))
if bytes > (128 + 32) * 1024 * 1024: # account for an extra (big) PE on LVM
- logging.error("Meta device too big (%.2fMiB)", (bytes / 1024 / 1024))
- return False
- return True
+ _ThrowError("Meta device too big (%.2fMiB)", (bytes / 1024 / 1024))
def Rename(self, new_id):
"""Rename a device.
_MAX_MINORS = 255
_PARSE_SHOW = None
+ # timeout constants
+ _NET_RECONFIG_TIMEOUT = 60
+
def __init__(self, unique_id, children):
if children and children.count(None) > 0:
children = []
self.major = self._DRBD_MAJOR
version = self._GetVersion()
if version['k_major'] != 8 :
- raise errors.BlockDeviceError("Mismatch in DRBD kernel version and"
- " requested ganeti usage: kernel is"
- " %s.%s, ganeti wants 8.x" %
- (version['k_major'], version['k_minor']))
+ _ThrowError("Mismatch in DRBD kernel version and requested ganeti"
+ " usage: kernel is %s.%s, ganeti wants 8.x",
+ version['k_major'], version['k_minor'])
if len(children) not in (0, 2):
raise ValueError("Invalid configuration data %s" % str(children))
result = utils.RunCmd(["drbdmeta", "--force", cls._DevPath(minor),
"v08", dev_path, "0", "create-md"])
if result.failed:
- raise errors.BlockDeviceError("Can't initialize meta device: %s" %
- result.output)
+ _ThrowError("Can't initialize meta device: %s", result.output)
@classmethod
def _FindUnusedMinor(cls):
return highest + 1
@classmethod
- def _IsValidMeta(cls, meta_device):
- """Check if the given meta device looks like a valid one.
-
- """
- minor = cls._FindUnusedMinor()
- minor_path = cls._DevPath(minor)
- result = utils.RunCmd(["drbdmeta", minor_path,
- "v08", meta_device, "0",
- "dstate"])
- if result.failed:
- logging.error("Invalid meta device %s: %s", meta_device, result.output)
- return False
- return True
-
- @classmethod
def _GetShowParser(cls):
"""Return a parser for `drbd show` output.
# value types
value = pyp.Word(pyp.alphanums + '_-/.:')
quoted = dbl_quote + pyp.CharsNotIn('"') + dbl_quote
- addr_port = (pyp.Word(pyp.nums + '.') + pyp.Literal(':').suppress() +
- number)
+ addr_type = (pyp.Optional(pyp.Literal("ipv4")).suppress() +
+ pyp.Optional(pyp.Literal("ipv6")).suppress())
+ addr_port = (addr_type + pyp.Word(pyp.nums + '.') +
+ pyp.Literal(':').suppress() + number)
# meta device, extended syntax
meta_value = ((value ^ quoted) + pyp.Literal('[').suppress() +
number + pyp.Word(']').suppress())
+ # device name, extended syntax
+ device_value = pyp.Literal("minor").suppress() + number
# a statement
stmt = (~rbrace + keyword + ~lbrace +
- pyp.Optional(addr_port ^ value ^ quoted ^ meta_value) +
+ pyp.Optional(addr_port ^ value ^ quoted ^ meta_value ^
+ device_value) +
pyp.Optional(defa) + semi +
pyp.Optional(pyp.restOfLine).suppress())
try:
results = bnf.parseString(out)
except pyp.ParseException, err:
- raise errors.BlockDeviceError("Can't parse drbdsetup show output: %s" %
- str(err))
+ _ThrowError("Can't parse drbdsetup show output: %s", str(err))
# and massage the results into our desired format
for section in results:
def _AssembleLocal(cls, minor, backend, meta):
"""Configure the local part of a DRBD device.
- This is the first thing that must be done on an unconfigured DRBD
- device. And it must be done only once.
-
"""
- if not cls._IsValidMeta(meta):
- return False
args = ["drbdsetup", cls._DevPath(minor), "disk",
backend, meta, "0", "-e", "detach", "--create-device"]
result = utils.RunCmd(args)
if result.failed:
- logging.error("Can't attach local disk: %s", result.output)
- return not result.failed
+ _ThrowError("drbd%d: can't attach local disk: %s", minor, result.output)
@classmethod
def _AssembleNet(cls, minor, net_info, protocol,
if None in net_info:
# we don't want network connection and actually want to make
# sure its shutdown
- return cls._ShutdownNet(minor)
+ cls._ShutdownNet(minor)
+ return
+
+ # Workaround for a race condition. When DRBD is doing its dance to
+ # establish a connection with its peer, it also sends the
+ # synchronization speed over the wire. In some cases setting the
+ # sync speed only after setting up both sides can race with DRBD
+ # connecting, hence we set it here before telling DRBD anything
+ # about its peer.
+ cls._SetMinorSyncSpeed(minor, constants.SYNC_SPEED)
args = ["drbdsetup", cls._DevPath(minor), "net",
"%s:%s" % (lhost, lport), "%s:%s" % (rhost, rport), protocol,
args.extend(["-a", hmac, "-x", secret])
result = utils.RunCmd(args)
if result.failed:
- logging.error("Can't setup network for dbrd device: %s - %s",
- result.fail_reason, result.output)
- return False
+ _ThrowError("drbd%d: can't setup network: %s - %s",
+ minor, result.fail_reason, result.output)
timeout = time.time() + 10
ok = False
ok = True
break
if not ok:
- logging.error("Timeout while configuring network")
- return False
- return True
+ _ThrowError("drbd%d: timeout while configuring network", minor)
def AddChildren(self, devices):
"""Add a disk to the DRBD device.
"""
if self.minor is None:
- raise errors.BlockDeviceError("Can't attach to dbrd8 during AddChildren")
+ _ThrowError("drbd%d: can't attach to dbrd8 during AddChildren",
+ self._aminor)
if len(devices) != 2:
- raise errors.BlockDeviceError("Need two devices for AddChildren")
+ _ThrowError("drbd%d: need two devices for AddChildren", self.minor)
info = self._GetDevInfo(self._GetShowData(self.minor))
if "local_dev" in info:
- raise errors.BlockDeviceError("DRBD8 already attached to a local disk")
+ _ThrowError("drbd%d: already attached to a local disk", self.minor)
backend, meta = devices
if backend.dev_path is None or meta.dev_path is None:
- raise errors.BlockDeviceError("Children not ready during AddChildren")
+ _ThrowError("drbd%d: children not ready during AddChildren", self.minor)
backend.Open()
meta.Open()
- if not self._CheckMetaSize(meta.dev_path):
- raise errors.BlockDeviceError("Invalid meta device size")
+ self._CheckMetaSize(meta.dev_path)
self._InitMeta(self._FindUnusedMinor(), meta.dev_path)
- if not self._IsValidMeta(meta.dev_path):
- raise errors.BlockDeviceError("Cannot initalize meta device")
- if not self._AssembleLocal(self.minor, backend.dev_path, meta.dev_path):
- raise errors.BlockDeviceError("Can't attach to local storage")
+ self._AssembleLocal(self.minor, backend.dev_path, meta.dev_path)
self._children = devices
def RemoveChildren(self, devices):
"""
if self.minor is None:
- raise errors.BlockDeviceError("Can't attach to drbd8 during"
- " RemoveChildren")
+ _ThrowError("drbd%d: can't attach to drbd8 during RemoveChildren",
+ self._aminor)
# early return if we don't actually have backing storage
info = self._GetDevInfo(self._GetShowData(self.minor))
if "local_dev" not in info:
return
if len(self._children) != 2:
- raise errors.BlockDeviceError("We don't have two children: %s" %
- self._children)
+ _ThrowError("drbd%d: we don't have two children: %s", self.minor,
+ self._children)
if self._children.count(None) == 2: # we don't actually have children :)
- logging.error("Requested detach while detached")
+ logging.warning("drbd%d: requested detach while detached", self.minor)
return
if len(devices) != 2:
- raise errors.BlockDeviceError("We need two children in RemoveChildren")
+ _ThrowError("drbd%d: we need two children in RemoveChildren", self.minor)
for child, dev in zip(self._children, devices):
if dev != child.dev_path:
- raise errors.BlockDeviceError("Mismatch in local storage"
- " (%s != %s) in RemoveChildren" %
- (dev, child.dev_path))
+ _ThrowError("drbd%d: mismatch in local storage (%s != %s) in"
+ " RemoveChildren", self.minor, dev, child.dev_path)
- if not self._ShutdownLocal(self.minor):
- raise errors.BlockDeviceError("Can't detach from local storage")
+ self._ShutdownLocal(self.minor)
self._children = []
- def SetSyncSpeed(self, kbytes):
+ @classmethod
+ def _SetMinorSyncSpeed(cls, minor, kbytes):
"""Set the speed of the DRBD syncer.
+ This is the low-level implementation.
+
+ @type minor: int
+ @param minor: the drbd minor whose settings we change
+ @type kbytes: int
+ @param kbytes: the speed in kbytes/second
+ @rtype: boolean
+ @return: the success of the operation
+
"""
- children_result = super(DRBD8, self).SetSyncSpeed(kbytes)
- if self.minor is None:
- logging.info("Instance not attached to a device")
- return False
- result = utils.RunCmd(["drbdsetup", self.dev_path, "syncer", "-r", "%d" %
- kbytes])
+ result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "syncer",
+ "-r", "%d" % kbytes, "--create-device"])
if result.failed:
logging.error("Can't change syncer rate: %s - %s",
result.fail_reason, result.output)
- return not result.failed and children_result
+ return not result.failed
+
+ def SetSyncSpeed(self, kbytes):
+ """Set the speed of the DRBD syncer.
+
+ @type kbytes: int
+ @param kbytes: the speed in kbytes/second
+ @rtype: boolean
+ @return: the success of the operation
+
+ """
+ if self.minor is None:
+ logging.info("Not attached during SetSyncSpeed")
+ return False
+ children_result = super(DRBD8, self).SetSyncSpeed(kbytes)
+ return self._SetMinorSyncSpeed(self.minor, kbytes) and children_result
def GetProcStatus(self):
"""Return device data from /proc.
"""
if self.minor is None:
- raise errors.BlockDeviceError("GetStats() called while not attached")
+ _ThrowError("drbd%d: GetStats() called while not attached", self._aminor)
proc_info = self._MassageProcData(self._GetProcData())
if self.minor not in proc_info:
- raise errors.BlockDeviceError("Can't find myself in /proc (minor %d)" %
- self.minor)
+ _ThrowError("drbd%d: can't find myself in /proc", self.minor)
return DRBD8Status(proc_info[self.minor])
def GetSyncStatus(self):
"""Returns the sync status of the device.
- Returns:
- (sync_percent, estimated_time, is_degraded)
If sync_percent is None, it means all is ok
If estimated_time is None, it means we can't esimate
We compute the ldisk parameter based on wheter we have a local
disk or not.
+ @rtype: tuple
+ @return: (sync_percent, estimated_time, is_degraded, ldisk)
+
"""
if self.minor is None and not self.Attach():
- raise errors.BlockDeviceError("Can't attach to device in GetSyncStatus")
+ _ThrowError("drbd%d: can't Attach() in GetSyncStatus", self._aminor)
stats = self.GetProcStatus()
ldisk = not stats.is_disk_uptodate
is_degraded = not stats.is_connected
cmd.append("-o")
result = utils.RunCmd(cmd)
if result.failed:
- msg = ("Can't make drbd device primary: %s" % result.output)
- logging.error(msg)
- raise errors.BlockDeviceError(msg)
+ _ThrowError("drbd%d: can't make drbd device primary: %s", self.minor,
+ result.output)
def Close(self):
"""Make the local state secondary.
"""
if self.minor is None and not self.Attach():
- logging.info("Instance not attached to a device")
- raise errors.BlockDeviceError("Can't find device")
+ _ThrowError("drbd%d: can't Attach() in Close()", self._aminor)
result = utils.RunCmd(["drbdsetup", self.dev_path, "secondary"])
if result.failed:
- msg = ("Can't switch drbd device to"
- " secondary: %s" % result.output)
- logging.error(msg)
- raise errors.BlockDeviceError(msg)
+ _ThrowError("drbd%d: can't switch drbd device to secondary: %s",
+ self.minor, result.output)
+
+ def DisconnectNet(self):
+ """Removes network configuration.
+
+ This method shutdowns the network side of the device.
+
+ The method will wait up to a hardcoded timeout for the device to
+ go into standalone after the 'disconnect' command before
+ re-configuring it, as sometimes it takes a while for the
+ disconnect to actually propagate and thus we might issue a 'net'
+ command while the device is still connected. If the device will
+ still be attached to the network and we time out, we raise an
+ exception.
+
+ """
+ if self.minor is None:
+ _ThrowError("drbd%d: disk not attached in re-attach net", self._aminor)
+
+ if None in (self._lhost, self._lport, self._rhost, self._rport):
+ _ThrowError("drbd%d: DRBD disk missing network info in"
+ " DisconnectNet()", self.minor)
+
+ ever_disconnected = _IgnoreError(self._ShutdownNet, self.minor)
+ timeout_limit = time.time() + self._NET_RECONFIG_TIMEOUT
+ sleep_time = 0.100 # we start the retry time at 100 miliseconds
+ while time.time() < timeout_limit:
+ status = self.GetProcStatus()
+ if status.is_standalone:
+ break
+ # retry the disconnect, it seems possible that due to a
+ # well-time disconnect on the peer, my disconnect command might
+ # be ingored and forgotten
+ ever_disconnected = _IgnoreError(self._ShutdownNet, self.minor) or \
+ ever_disconnected
+ time.sleep(sleep_time)
+ sleep_time = min(2, sleep_time * 1.5)
+
+ if not status.is_standalone:
+ if ever_disconnected:
+ msg = ("drbd%d: device did not react to the"
+ " 'disconnect' command in a timely manner")
+ else:
+ msg = "drbd%d: can't shutdown network, even after multiple retries"
+ _ThrowError(msg, self.minor)
+
+ reconfig_time = time.time() - timeout_limit + self._NET_RECONFIG_TIMEOUT
+ if reconfig_time > 15: # hardcoded alert limit
+ logging.info("drbd%d: DisconnectNet: detach took %.3f seconds",
+ self.minor, reconfig_time)
+
+ def AttachNet(self, multimaster):
+ """Reconnects the network.
+
+ This method connects the network side of the device with a
+ specified multi-master flag. The device needs to be 'Standalone'
+ but have valid network configuration data.
+
+ Args:
+ - multimaster: init the network in dual-primary mode
+
+ """
+ if self.minor is None:
+ _ThrowError("drbd%d: device not attached in AttachNet", self._aminor)
+
+ if None in (self._lhost, self._lport, self._rhost, self._rport):
+ _ThrowError("drbd%d: missing network info in AttachNet()", self.minor)
+
+ status = self.GetProcStatus()
+
+ if not status.is_standalone:
+ _ThrowError("drbd%d: device is not standalone in AttachNet", self.minor)
+
+ self._AssembleNet(self.minor,
+ (self._lhost, self._lport, self._rhost, self._rport),
+ constants.DRBD_NET_PROTOCOL, dual_pri=multimaster,
+ hmac=constants.DRBD_HMAC_ALG, secret=self._secret)
def Attach(self):
- """Find a DRBD device which matches our config and attach to it.
+ """Check if our minor is configured.
+
+ This doesn't do any device configurations - it only checks if the
+ minor is in a state different from Unconfigured.
+
+ Note that this function will not change the state of the system in
+ any way (except in case of side-effects caused by reading from
+ /proc).
+
+ """
+ used_devs = self.GetUsedDevs()
+ if self._aminor in used_devs:
+ minor = self._aminor
+ else:
+ minor = None
+
+ self._SetFromMinor(minor)
+ return minor is not None
+
+ def Assemble(self):
+ """Assemble the drbd.
+
+ Method:
+ - if we have a configured device, we try to ensure that it matches
+ our config
+ - if not, we create it from zero
+
+ """
+ super(DRBD8, self).Assemble()
+
+ self.Attach()
+ if self.minor is None:
+ # local device completely unconfigured
+ self._FastAssemble()
+ else:
+ # we have to recheck the local and network status and try to fix
+ # the device
+ self._SlowAssemble()
+
+ def _SlowAssemble(self):
+ """Assembles the DRBD device from a (partially) configured device.
In case of partially attached (local device matches but no network
setup), we perform the network attach. If successful, we re-test
the attach if can return success.
"""
+ net_data = (self._lhost, self._lport, self._rhost, self._rport)
for minor in (self._aminor,):
info = self._GetDevInfo(self._GetShowData(minor))
match_l = self._MatchesLocal(info)
match_r = self._MatchesNet(info)
+
if match_l and match_r:
+ # everything matches
break
+
if match_l and not match_r and "local_addr" not in info:
- res_r = self._AssembleNet(minor,
- (self._lhost, self._lport,
- self._rhost, self._rport),
- "C")
- if res_r:
- if self._MatchesNet(self._GetDevInfo(self._GetShowData(minor))):
- break
- # the weakest case: we find something that is only net attached
- # even though we were passed some children at init time
+ # disk matches, but not attached to network, attach and recheck
+ self._AssembleNet(minor, net_data, constants.DRBD_NET_PROTOCOL,
+ hmac=constants.DRBD_HMAC_ALG, secret=self._secret)
+ if self._MatchesNet(self._GetDevInfo(self._GetShowData(minor))):
+ break
+ else:
+ _ThrowError("drbd%d: network attach successful, but 'drbdsetup"
+ " show' disagrees", minor)
+
if match_r and "local_dev" not in info:
- break
+ # no local disk, but network attached and it matches
+ self._AssembleLocal(minor, self._children[0].dev_path,
+ self._children[1].dev_path)
+ if self._MatchesNet(self._GetDevInfo(self._GetShowData(minor))):
+ break
+ else:
+ _ThrowError("drbd%d: disk attach successful, but 'drbdsetup"
+ " show' disagrees", minor)
# this case must be considered only if we actually have local
# storage, i.e. not in diskless mode, because all diskless
# else, even though its local storage is ours; as we own the
# drbd space, we try to disconnect from the remote peer and
# reconnect to our correct one
- if not self._ShutdownNet(minor):
- raise errors.BlockDeviceError("Device has correct local storage,"
- " wrong remote peer and is unable to"
- " disconnect in order to attach to"
- " the correct peer")
+ try:
+ self._ShutdownNet(minor)
+ except errors.BlockDeviceError, err:
+ _ThrowError("drbd%d: device has correct local storage, wrong"
+ " remote peer and is unable to disconnect in order"
+ " to attach to the correct peer: %s", minor, str(err))
# note: _AssembleNet also handles the case when we don't want
# local storage (i.e. one or more of the _[lr](host|port) is
# None)
- if (self._AssembleNet(minor, (self._lhost, self._lport,
- self._rhost, self._rport), "C") and
- self._MatchesNet(self._GetDevInfo(self._GetShowData(minor)))):
+ self._AssembleNet(minor, net_data, constants.DRBD_NET_PROTOCOL,
+ hmac=constants.DRBD_HMAC_ALG, secret=self._secret)
+ if self._MatchesNet(self._GetDevInfo(self._GetShowData(minor))):
break
+ else:
+ _ThrowError("drbd%d: network attach successful, but 'drbdsetup"
+ " show' disagrees", minor)
else:
minor = None
self._SetFromMinor(minor)
- return minor is not None
+ if minor is None:
+ _ThrowError("drbd%d: cannot activate, unknown or unhandled reason",
+ self._aminor)
- def Assemble(self):
- """Assemble the drbd.
+ def _FastAssemble(self):
+ """Assemble the drbd device from zero.
- Method:
- - if we have a local backing device, we bind to it by:
- - checking the list of used drbd devices
- - check if the local minor use of any of them is our own device
- - if yes, abort?
- - if not, bind
- - if we have a local/remote net info:
- - redo the local backing device step for the remote device
- - check if any drbd device is using the local port,
- if yes abort
- - check if any remote drbd device is using the remote
- port, if yes abort (for now)
- - bind our net port
- - bind the remote net port
+ This is run when in Assemble we detect our minor is unused.
"""
- self.Attach()
- if self.minor is not None:
- logging.info("Already assembled")
- return True
-
- result = super(DRBD8, self).Assemble()
- if not result:
- return result
-
- # TODO: maybe completely tear-down the minor (drbdsetup ... down)
- # before attaching our own?
minor = self._aminor
- need_localdev_teardown = False
if self._children and self._children[0] and self._children[1]:
- result = self._AssembleLocal(minor, self._children[0].dev_path,
- self._children[1].dev_path)
- if not result:
- return False
- need_localdev_teardown = True
+ self._AssembleLocal(minor, self._children[0].dev_path,
+ self._children[1].dev_path)
if self._lhost and self._lport and self._rhost and self._rport:
- result = self._AssembleNet(minor,
- (self._lhost, self._lport,
- self._rhost, self._rport),
- "C")
- if not result:
- if need_localdev_teardown:
- # we will ignore failures from this
- logging.error("net setup failed, tearing down local device")
- self._ShutdownAll(minor)
- return False
+ self._AssembleNet(minor,
+ (self._lhost, self._lport, self._rhost, self._rport),
+ constants.DRBD_NET_PROTOCOL,
+ hmac=constants.DRBD_HMAC_ALG, secret=self._secret)
self._SetFromMinor(minor)
- return True
@classmethod
def _ShutdownLocal(cls, minor):
"""
result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "detach"])
if result.failed:
- logging.error("Can't detach local device: %s", result.output)
- return not result.failed
+ _ThrowError("drbd%d: can't detach local disk: %s", minor, result.output)
@classmethod
def _ShutdownNet(cls, minor):
"""
result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "disconnect"])
if result.failed:
- logging.error("Can't shutdown network: %s", result.output)
- return not result.failed
+ _ThrowError("drbd%d: can't shutdown network: %s", minor, result.output)
@classmethod
def _ShutdownAll(cls, minor):
"""
result = utils.RunCmd(["drbdsetup", cls._DevPath(minor), "down"])
if result.failed:
- logging.error("Can't shutdown drbd device: %s", result.output)
- return not result.failed
+ _ThrowError("drbd%d: can't shutdown drbd device: %s",
+ minor, result.output)
def Shutdown(self):
"""Shutdown the DRBD device.
"""
if self.minor is None and not self.Attach():
- logging.info("DRBD device not attached to a device during Shutdown")
- return True
- if not self._ShutdownAll(self.minor):
- return False
+ logging.info("drbd%d: not attached during Shutdown()", self._aminor)
+ return
+ minor = self.minor
self.minor = None
self.dev_path = None
- return True
+ self._ShutdownAll(minor)
def Remove(self):
"""Stub remove for DRBD devices.
"""
- return self.Shutdown()
+ self.Shutdown()
@classmethod
def Create(cls, unique_id, children, size):
"""
if len(children) != 2:
raise errors.ProgrammerError("Invalid setup for the drbd device")
+ # check that the minor is unused
+ aminor = unique_id[4]
+ proc_info = cls._MassageProcData(cls._GetProcData())
+ if aminor in proc_info:
+ status = DRBD8Status(proc_info[aminor])
+ in_use = status.is_in_use
+ else:
+ in_use = False
+ if in_use:
+ _ThrowError("drbd%d: minor is already in use at Create() time", aminor)
meta = children[1]
meta.Assemble()
if not meta.Attach():
- raise errors.BlockDeviceError("Can't attach to meta device")
- if not cls._CheckMetaSize(meta.dev_path):
- raise errors.BlockDeviceError("Invalid meta device size")
- cls._InitMeta(cls._FindUnusedMinor(), meta.dev_path)
- if not cls._IsValidMeta(meta.dev_path):
- raise errors.BlockDeviceError("Cannot initalize meta device")
+ _ThrowError("drbd%d: can't attach to meta device '%s'",
+ aminor, meta)
+ cls._CheckMetaSize(meta.dev_path)
+ cls._InitMeta(aminor, meta.dev_path)
return cls(unique_id, children)
def Grow(self, amount):
"""
if self.minor is None:
- raise errors.ProgrammerError("drbd8: Grow called while not attached")
+ _ThrowError("drbd%d: Grow called while not attached", self._aminor)
if len(self._children) != 2 or None in self._children:
- raise errors.BlockDeviceError("Cannot grow diskless DRBD8 device")
+ _ThrowError("drbd%d: cannot grow diskless device", self.minor)
self._children[0].Grow(amount)
result = utils.RunCmd(["drbdsetup", self.dev_path, "resize"])
if result.failed:
- raise errors.BlockDeviceError("resize failed for %s: %s" %
- (self.dev_path, result.output))
- return
+ _ThrowError("drbd%d: resize failed: %s", self.minor, result.output)
class FileStorage(BlockDev):
raise ValueError("Invalid configuration data %s" % str(unique_id))
self.driver = unique_id[0]
self.dev_path = unique_id[1]
+ self.Attach()
def Assemble(self):
"""Assemble the device.
"""
if not os.path.exists(self.dev_path):
- raise errors.BlockDeviceError("File device '%s' does not exist." %
- self.dev_path)
- return True
+ _ThrowError("File device '%s' does not exist" % self.dev_path)
def Shutdown(self):
"""Shutdown the device.
the file on shutdown.
"""
- return True
+ pass
def Open(self, force=False):
"""Make the device ready for I/O.
def Remove(self):
"""Remove the file backing the block device.
- Returns:
- boolean indicating wheter removal of file was successful or not.
+ @rtype: boolean
+ @return: True if the removal was successful
"""
- if not os.path.exists(self.dev_path):
- return True
try:
os.remove(self.dev_path)
- return True
except OSError, err:
- logging.error("Can't remove file '%s': %s", self.dev_path, err)
- return False
+ if err.errno != errno.ENOENT:
+ _ThrowError("Can't remove file '%s': %s", self.dev_path, err)
def Attach(self):
"""Attach to an existing file.
Check if this file already exists.
- Returns:
- boolean indicating if file exists or not.
+ @rtype: boolean
+ @return: True if file exists
"""
- if os.path.exists(self.dev_path):
- return True
- return False
+ self.attached = os.path.exists(self.dev_path)
+ return self.attached
@classmethod
def Create(cls, unique_id, children, size):
"""Create a new file.
- Args:
- children:
- size: integer size of file in MiB
+ @param size: the size of file in MiB
- Returns:
- A ganeti.bdev.FileStorage object.
+ @rtype: L{bdev.FileStorage}
+ @return: an instance of FileStorage
"""
if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2:
raise ValueError("Invalid configuration data %s" % str(unique_id))
dev_path = unique_id[1]
+ if os.path.exists(dev_path):
+ _ThrowError("File already existing: %s", dev_path)
try:
f = open(dev_path, 'w')
- except IOError, err:
- raise errors.BlockDeviceError("Could not create '%'" % err)
- else:
f.truncate(size * 1024 * 1024)
f.close()
+ except IOError, err:
+ _ThrowError("Error in file creation: %", str(err))
return FileStorage(unique_id, children)
device = DEV_MAP[dev_type](unique_id, children)
if not device.attached:
return None
- return device
+ return device
-def AttachOrAssemble(dev_type, unique_id, children):
+def Assemble(dev_type, unique_id, children):
"""Try to attach or assemble an existing device.
- This will attach to an existing assembled device or will assemble
- the device, as needed, to bring it fully up.
+ This will attach to assemble the device, as needed, to bring it
+ fully up. It must be safe to run on already-assembled devices.
"""
if dev_type not in DEV_MAP:
raise errors.ProgrammerError("Invalid block device type '%s'" % dev_type)
device = DEV_MAP[dev_type](unique_id, children)
- if not device.attached:
- device.Assemble()
- if not device.attached:
- raise errors.BlockDeviceError("Can't find a valid block device for"
- " %s/%s/%s" %
- (dev_type, unique_id, children))
+ device.Assemble()
return device