From: Apollon Oikonomopoulos Date: Fri, 4 Mar 2011 14:35:23 +0000 (+0200) Subject: Shared block storage support X-Git-Tag: v2.5.0beta1~561 X-Git-Url: https://code.grnet.gr/git/ganeti-local/commitdiff_plain/b6135bbc19e9b9fcfc621232cc25cff1b20eb4f1 Shared block storage support This patch introduces basic shared block storage support. It introduces a new storage backend, bdev.PersistentBlockDevice, to use as a backend for shared block storage. The new bdev requires a new BLOCKDEV_DRIVER_MANUAL constant with the value "manual" and uses it as the first part of the block device unique_id. A new disk template, DT_BLOCK is introduced as well and added to DTS_EXT_MIRROR and DTS_MAY_ADOPT. Also added DTS_MUST_ADOPT constant and use it to check for the presence of the adopt keyword during LU invocation. We enforce the /dev/disk limitation upon adoption, but we allow block devices to reside anywhere under /dev. This is very basic support and includes no storage manipulation (provisioning, resizing, renaming) which will have to be implemented through a "driver" framework. Signed-off-by: Apollon Oikonomopoulos [iustin@google.com: slight changes to bdev.py] Signed-off-by: Iustin Pop Reviewed-by: Iustin Pop --- diff --git a/lib/bdev.py b/lib/bdev.py index 69797bc..2430b1d 100644 --- a/lib/bdev.py +++ b/lib/bdev.py @@ -24,6 +24,7 @@ import re import time import errno +import stat import pyparsing as pyp import os import logging @@ -2069,9 +2070,120 @@ class FileStorage(BlockDev): return FileStorage(unique_id, children, size) +class PersistentBlockDevice(BlockDev): + """A block device with persistent node + + May be either directly attached, or exposed through DM (e.g. dm-multipath). + udev helpers are probably required to give persistent, human-friendly + names. + + For the time being, pathnames are required to lie under /dev. + + """ + def __init__(self, unique_id, children, size): + """Attaches to a static block device. + + The unique_id is a path under /dev. + + """ + super(PersistentBlockDevice, self).__init__(unique_id, children, size) + if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2: + raise ValueError("Invalid configuration data %s" % str(unique_id)) + self.dev_path = unique_id[1] + if not os.path.realpath(self.dev_path).startswith('/dev/'): + raise ValueError("Full path '%s' lies outside /dev" % + os.path.realpath(self.dev_path)) + # TODO: this is just a safety guard checking that we only deal with devices + # we know how to handle. In the future this will be integrated with + # external storage backends and possible values will probably be collected + # from the cluster configuration. + if unique_id[0] != constants.BLOCKDEV_DRIVER_MANUAL: + raise ValueError("Got persistent block device of invalid type: %s" % + unique_id[0]) + + self.major = self.minor = None + self.Attach() + + @classmethod + def Create(cls, unique_id, children, size): + """Create a new device + + This is a noop, we only return a PersistentBlockDevice instance + + """ + return PersistentBlockDevice(unique_id, children, 0) + + def Remove(self): + """Remove a device + + This is a noop + + """ + pass + + def Rename(self, new_id): + """Rename this device. + + """ + _ThrowError("Rename is not supported for PersistentBlockDev storage") + + def Attach(self): + """Attach to an existing block device. + + + """ + self.attached = False + try: + st = os.stat(self.dev_path) + except OSError, err: + logging.error("Error stat()'ing %s: %s", self.dev_path, str(err)) + return False + + if not stat.S_ISBLK(st.st_mode): + logging.error("%s is not a block device", self.dev_path) + return False + + self.major = os.major(st.st_rdev) + self.minor = os.minor(st.st_rdev) + self.attached = True + + return True + + def Assemble(self): + """Assemble the device. + + """ + pass + + def Shutdown(self): + """Shutdown the device. + + """ + pass + + def Open(self, force=False): + """Make the device ready for I/O. + + """ + pass + + def Close(self): + """Notifies that the device will no longer be used for I/O. + + """ + pass + + def Grow(self, amount): + """Grow the logical volume. + + """ + _ThrowError("Grow is not supported for PersistentBlockDev storage") + + DEV_MAP = { constants.LD_LV: LogicalVolume, constants.LD_DRBD8: DRBD8, + constants.LD_BLOCKDEV: PersistentBlockDevice, } if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE: diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 29ed69a..be8e70c 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -6661,6 +6661,19 @@ def _GenerateDiskTemplate(lu, template_name, disk_index)), mode=disk["mode"]) disks.append(disk_dev) + elif template_name == constants.DT_BLOCK: + if len(secondary_nodes) != 0: + raise errors.ProgrammerError("Wrong template configuration") + + for idx, disk in enumerate(disk_info): + disk_index = idx + base_index + disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV, size=disk["size"], + logical_id=(constants.BLOCKDEV_DRIVER_MANUAL, + disk["adopt"]), + iv_name="disk/%d" % disk_index, + mode=disk["mode"]) + disks.append(disk_dev) + else: raise errors.ProgrammerError("Invalid disk template '%s'" % template_name) return disks @@ -6887,6 +6900,7 @@ def _ComputeDiskSize(disk_template, disks): constants.DT_DRBD8: sum(d["size"] + 128 for d in disks), constants.DT_FILE: None, constants.DT_SHARED_FILE: 0, + constants.DT_BLOCK: 0, } if disk_template not in req_size_dict: @@ -7022,6 +7036,12 @@ class LUInstanceCreate(LogicalUnit): if self.op.mode == constants.INSTANCE_IMPORT: raise errors.OpPrereqError("Disk adoption not allowed for" " instance import", errors.ECODE_INVAL) + else: + if self.op.disk_template in constants.DTS_MUST_ADOPT: + raise errors.OpPrereqError("Disk template %s requires disk adoption," + " but no 'adopt' parameter given" % + self.op.disk_template, + errors.ECODE_INVAL) self.adopt_disks = has_adopt @@ -7614,7 +7634,7 @@ class LUInstanceCreate(LogicalUnit): req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks) _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes) - else: # instead, we must check the adoption data + elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks]) if len(all_lvs) != len(self.disks): raise errors.OpPrereqError("Duplicate volume names given for adoption", @@ -7650,6 +7670,34 @@ class LUInstanceCreate(LogicalUnit): for dsk in self.disks: dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0])) + elif self.op.disk_template == constants.DT_BLOCK: + # Normalize and de-duplicate device paths + all_disks = set([os.path.abspath(i["adopt"]) for i in self.disks]) + if len(all_disks) != len(self.disks): + raise errors.OpPrereqError("Duplicate disk names given for adoption", + errors.ECODE_INVAL) + baddisks = [d for d in all_disks + if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)] + if baddisks: + raise errors.OpPrereqError("Device node(s) %s lie outside %s and" + " cannot be adopted" % + (", ".join(baddisks), + constants.ADOPTABLE_BLOCKDEV_ROOT), + errors.ECODE_INVAL) + + node_disks = self.rpc.call_bdev_sizes([pnode.name], + list(all_disks))[pnode.name] + node_disks.Raise("Cannot get block device information from node %s" % + pnode.name) + node_disks = node_disks.payload + delta = all_disks.difference(node_disks.keys()) + if delta: + raise errors.OpPrereqError("Missing block device(s): %s" % + utils.CommaJoin(delta), + errors.ECODE_INVAL) + for dsk in self.disks: + dsk["size"] = int(float(node_disks[dsk["adopt"]])) + _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams) _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant) @@ -7721,17 +7769,18 @@ class LUInstanceCreate(LogicalUnit): ) if self.adopt_disks: - # rename LVs to the newly-generated names; we need to construct - # 'fake' LV disks with the old data, plus the new unique_id - tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks] - rename_to = [] - for t_dsk, a_dsk in zip (tmp_disks, self.disks): - rename_to.append(t_dsk.logical_id) - t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"]) - self.cfg.SetDiskID(t_dsk, pnode_name) - result = self.rpc.call_blockdev_rename(pnode_name, - zip(tmp_disks, rename_to)) - result.Raise("Failed to rename adoped LVs") + if self.op.disk_template == constants.DT_PLAIN: + # rename LVs to the newly-generated names; we need to construct + # 'fake' LV disks with the old data, plus the new unique_id + tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks] + rename_to = [] + for t_dsk, a_dsk in zip (tmp_disks, self.disks): + rename_to.append(t_dsk.logical_id) + t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"]) + self.cfg.SetDiskID(t_dsk, pnode_name) + result = self.rpc.call_blockdev_rename(pnode_name, + zip(tmp_disks, rename_to)) + result.Raise("Failed to rename adoped LVs") else: feedback_fn("* creating instance disks...") try: diff --git a/lib/constants.py b/lib/constants.py index 033de03..816ad17 100644 --- a/lib/constants.py +++ b/lib/constants.py @@ -120,6 +120,7 @@ CRYPTO_KEYS_DIR = RUN_GANETI_DIR + "/crypto" CRYPTO_KEYS_DIR_MODE = SECURE_DIR_MODE IMPORT_EXPORT_DIR = RUN_GANETI_DIR + "/import-export" IMPORT_EXPORT_DIR_MODE = 0755 +ADOPTABLE_BLOCKDEV_ROOT = "/dev/disk/" # keep RUN_GANETI_DIR first here, to make sure all get created when the node # daemon is started (this takes care of RUN_DIR being tmpfs) SUB_RUN_DIRS = [ RUN_GANETI_DIR, BDEV_CACHE_DIR, DISK_LINKS_DIR ] @@ -363,21 +364,25 @@ DT_PLAIN = "plain" DT_DRBD8 = "drbd" DT_FILE = "file" DT_SHARED_FILE = "sharedfile" +DT_BLOCK = "blockdev" # the set of network-mirrored disk templates DTS_NET_MIRROR = frozenset([DT_DRBD8]) -# the set of externally mirrored disk templates -DTS_EXT_MIRROR = frozenset([DT_SHARED_FILE]) +# the set of externally-mirrored disk templates (e.g. SAN, NAS) +DTS_EXT_MIRROR = frozenset([DT_SHARED_FILE, DT_BLOCK]) # the set of non-lvm-based disk templates -DTS_NOT_LVM = frozenset([DT_DISKLESS, DT_FILE, DT_SHARED_FILE]) +DTS_NOT_LVM = frozenset([DT_DISKLESS, DT_FILE, DT_SHARED_FILE, DT_BLOCK]) # the set of disk templates which can be grown DTS_GROWABLE = frozenset([DT_PLAIN, DT_DRBD8, DT_FILE, DT_SHARED_FILE]) # the set of disk templates that allow adoption -DTS_MAY_ADOPT = frozenset([DT_PLAIN]) +DTS_MAY_ADOPT = frozenset([DT_PLAIN, DT_BLOCK]) + +# the set of disk templates that *must* use adoption +DTS_MUST_ADOPT = frozenset([DT_BLOCK]) # the set of disk templates that allow migrations DTS_MIRRORED = frozenset.union(DTS_NET_MIRROR, DTS_EXT_MIRROR) @@ -387,7 +392,8 @@ DTS_MIRRORED = frozenset.union(DTS_NET_MIRROR, DTS_EXT_MIRROR) LD_LV = "lvm" LD_DRBD8 = "drbd8" LD_FILE = "file" -LDS_BLOCK = frozenset([LD_LV, LD_DRBD8]) +LD_BLOCKDEV = "blockdev" +LDS_BLOCK = frozenset([LD_LV, LD_DRBD8, LD_BLOCKDEV]) # drbd constants DRBD_HMAC_ALG = "md5" @@ -460,7 +466,7 @@ RIE_CONNECT_RETRIES = 10 CHILD_LINGER_TIMEOUT = 5.0 DISK_TEMPLATES = frozenset([DT_DISKLESS, DT_PLAIN, DT_DRBD8, - DT_FILE, DT_SHARED_FILE]) + DT_FILE, DT_SHARED_FILE, DT_BLOCK]) FILE_DRIVER = frozenset([FD_LOOP, FD_BLKTAP]) @@ -1312,3 +1318,6 @@ VALID_ALLOC_POLICIES = [ ALLOC_POLICY_LAST_RESORT, ALLOC_POLICY_UNALLOCABLE, ] + +# Temporary external/shared storage parameters +BLOCKDEV_DRIVER_MANUAL = "manual" diff --git a/lib/objects.py b/lib/objects.py index ef99349..fb7e323 100644 --- a/lib/objects.py +++ b/lib/objects.py @@ -441,6 +441,8 @@ class Disk(ConfigObject): """ if self.dev_type == constants.LD_LV: return "/dev/%s/%s" % (self.logical_id[0], self.logical_id[1]) + elif self.dev_type == constants.LD_BLOCKDEV: + return self.logical_id[1] return None def ChildrenNeeded(self): @@ -483,7 +485,8 @@ class Disk(ConfigObject): devices needs to (or can) be assembled. """ - if self.dev_type in [constants.LD_LV, constants.LD_FILE]: + if self.dev_type in [constants.LD_LV, constants.LD_FILE, + constants.LD_BLOCKDEV]: result = [node] elif self.dev_type in constants.LDS_DRBD: result = [self.logical_id[0], self.logical_id[1]]