X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/9cdb95781e44c99134f20a331f744c995ff5a309..43c16a8a1adfd543751fcaf60ad4c8e04cf83688:/tools/lvmstrap diff --git a/tools/lvmstrap b/tools/lvmstrap index 223f6fd..6512477 100755 --- a/tools/lvmstrap +++ b/tools/lvmstrap @@ -1,7 +1,7 @@ #!/usr/bin/python # -# Copyright (C) 2006, 2007 Google Inc. +# Copyright (C) 2006, 2007, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -36,6 +36,7 @@ OSError. The idea behind this is, since we run as root, we should usually not get these errors, but if we do it's most probably a system error, so they should be handled and the user instructed to report them. + """ @@ -43,18 +44,49 @@ import os import sys import optparse import time +import errno +import re from ganeti.utils import RunCmd, ReadFile from ganeti import constants from ganeti import cli +from ganeti import compat USAGE = ("\tlvmstrap diskinfo\n" - "\tlvmstrap [--vgname=NAME] [--allow-removable]" - " { --alldisks | --disks DISKLIST }" + "\tlvmstrap [--vg-name=NAME] [--allow-removable]" + " { --alldisks | --disks DISKLIST } [--use-sfdisk]" " create") verbose_flag = False +#: Supported disk types (as prefixes) +SUPPORTED_TYPES = [ + "hd", + "sd", + "md", + "ubd", + ] + +#: Excluded filesystem types +EXCLUDED_FS = frozenset([ + "nfs", + "nfs4", + "autofs", + "tmpfs", + "proc", + "sysfs", + "usbfs", + "devpts", + ]) + +#: A regular expression that matches partitions (must be kept in sync +# with L{SUPPORTED_TYPES} +PART_RE = re.compile("^((?:h|s|m|ub)d[a-z]{1,2})[0-9]+$") + +#: Minimum partition size to be considered (1 GB) +PART_MINSIZE = 1024 * 1024 * 1024 +MBR_MAX_SIZE = 2 * (10 ** 12) + class Error(Exception): """Generic exception""" @@ -66,6 +98,7 @@ class ProgrammingError(Error): This should catch sysfs tree changes, or otherwise incorrect assumptions about the contents of the /sys/block/... directories. + """ pass @@ -79,6 +112,7 @@ class SysconfigError(Error): This should usually mean that the installation of the Xen node failed in some steps. + """ pass @@ -92,6 +126,7 @@ class PrereqError(Error): This should usually mean that the build steps for the Xen node were not followed correctly. + """ pass @@ -100,6 +135,7 @@ class OperationalError(Error): """Exception denoting actual errors. Errors during the bootstrapping are signaled using this exception. + """ pass @@ -109,13 +145,15 @@ class ParameterError(Error): Wrong disks given as parameters will be signaled using this exception. + """ pass def Usage(): - """Shows program usage information and exits the program.""" + """Shows program usage information and exits the program. + """ print >> sys.stderr, "Usage:" print >> sys.stderr, USAGE sys.exit(2) @@ -127,10 +165,12 @@ def ParseOptions(): In case of command line errors, it will show the usage and exit the program. - Returns: - (options, args), as returned by OptionParser.parse_args + @rtype: tuple + @return: a tuple of (options, args), as returned by + OptionParser.parse_args + """ - global verbose_flag + global verbose_flag # pylint: disable=W0603 parser = optparse.OptionParser(usage="\n%s" % USAGE, version="%%prog (ganeti) %s" % @@ -149,7 +189,9 @@ def ParseOptions(): parser.add_option("-g", "--vg-name", type="string", dest="vgname", default="xenvg", metavar="NAME", help="the volume group to be created [default: xenvg]") - + parser.add_option("--use-sfdisk", dest="use_sfdisk", + action="store_true", default=False, + help="use sfdisk instead of parted") options, args = parser.parse_args() if len(args) != 1: @@ -160,6 +202,40 @@ def ParseOptions(): return options, args +def IsPartitioned(disk): + """Returns whether a given disk should be used partitioned or as-is. + + Currently only md devices are used as is. + + """ + return not (disk.startswith("md") or PART_RE.match(disk)) + + +def DeviceName(disk): + """Returns the appropriate device name for a disk. + + For non-partitioned devices, it returns the name as is, otherwise it + returns the first partition. + + """ + if IsPartitioned(disk): + device = "/dev/%s1" % disk + else: + device = "/dev/%s" % disk + return device + + +def SysfsName(disk): + """Returns the sysfs name for a disk or partition. + + """ + match = PART_RE.match(disk) + if match: + # this is a partition, which resides in /sys/block under a different name + disk = "%s/%s" % (match.group(1), disk) + return "/sys/block/%s" % disk + + def ExecCommand(command): """Executes a command. @@ -167,13 +243,12 @@ def ExecCommand(command): difference that if the command line argument -v has been given, it will print the command line and the command output on stdout. - Args: - the command line - Returns: - (status, output) where status is the exit status and output the - stdout and stderr of the command together - """ + @param command: the command line to be executed + @rtype: tuple + @return: a tuple of (status, output) where status is the exit status + and output the stdout and stderr of the command together + """ if verbose_flag: print command result = RunCmd(command) @@ -187,19 +262,19 @@ def CheckPrereq(): It check that it runs on Linux 2.6, and that /sys is mounted and the fact that /sys/block is a directory. - """ + """ if os.getuid() != 0: raise PrereqError("This tool runs as root only. Really.") - osname, nodename, release, version, arch = os.uname() - if osname != 'Linux': + osname, _, release, _, _ = os.uname() + if osname != "Linux": raise PrereqError("This tool only runs on Linux" " (detected OS: %s)." % osname) - if not release.startswith("2.6."): + if not (release.startswith("2.6.") or release.startswith("3.")): raise PrereqError("Wrong major kernel version (detected %s, needs" - " 2.6.*)" % release) + " 2.6.* or 3.*)" % release) if not os.path.ismount("/sys"): raise PrereqError("Can't find a filesystem mounted at /sys." @@ -220,18 +295,16 @@ def CheckPrereq(): def CheckVGExists(vgname): """Checks to see if a volume group exists. - Args: - vgname: the volume group name + @param vgname: the volume group name - Returns: - a four-tuple (exists, lv_count, vg_size, vg_free), where: - exists: True if the volume exists, otherwise False; if False, + @return: a four-tuple (exists, lv_count, vg_size, vg_free), where: + - exists: True if the volume exists, otherwise False; if False, all other members of the tuple are None - lv_count: The number of logical volumes in the volume group - vg_size: The total size of the volume group (in gibibytes) - vg_free: The available space in the volume group - """ + - lv_count: The number of logical volumes in the volume group + - vg_size: The total size of the volume group (in gibibytes) + - vg_free: The available space in the volume group + """ result = ExecCommand("vgs --nohead -o lv_count,vg_size,vg_free" " --nosuffix --units g" " --ignorelockingfailure %s" % vgname) @@ -261,17 +334,13 @@ def CheckSysDev(name, devnum): some retries here. Since we only do a stat, we can afford to do many short retries. - Args: - name: the device name, e.g. 'sda' - devnum: the device number, e.g. 0x803 (2051 in decimal) for sda3 + @param name: the device name, e.g. 'sda' + @param devnum: the device number, e.g. 0x803 (2051 in decimal) for sda3 + @raises L{SysconfigError}: in case of failure of the check - Returns: - None; failure of the check is signaled by raising a - SysconfigError exception """ - path = "/dev/%s" % name - for retries in range(40): + for _ in range(40): if os.path.exists(path): break time.sleep(0.250) @@ -293,13 +362,13 @@ def ReadDev(syspath): function reads that file and converts the major:minor pair to a dev number. - Args: - syspath: the path to a block device dir in sysfs, e.g. /sys/block/sda + @type syspath: string + @param syspath: the path to a block device dir in sysfs, + e.g. C{/sys/block/sda} - Returns: - the device number - """ + @return: the device number + """ if not os.path.exists("%s/dev" % syspath): raise ProgrammingError("Invalid path passed to ReadDev: %s" % syspath) f = open("%s/dev" % syspath) @@ -320,11 +389,13 @@ def ReadSize(syspath): function reads that file and converts the number in sectors to the size in bytes. - Args: - syspath: the path to a block device dir in sysfs, e.g. /sys/block/sda + @type syspath: string + @param syspath: the path to a block device dir in sysfs, + e.g. C{/sys/block/sda} + + @rtype: int + @return: the device size in bytes - Returns: - the device size in bytes """ if not os.path.exists("%s/size" % syspath): @@ -341,14 +412,13 @@ def ReadPV(name): This function tries to see if a block device is a physical volume. - Args: - dev: the device name (e.g. sda) - Returns: - The name of the volume group to which this PV belongs, or - "" if this PV is not in use, or - None if this is not a PV - """ + @type name: string + @param name: the device name (e.g. sda) + + @return: the name of the volume group to which this PV belongs, or + "" if this PV is not in use, or None if this is not a PV + """ result = ExecCommand("pvdisplay -c /dev/%s" % name) if result.failed: return None @@ -362,28 +432,25 @@ def GetDiskList(opts): This function examines the /sys/block tree and using information therein, computes the status of the block device. - Returns: - [(name, size, dev, partitions, inuse), ...] - where: - name is the block device name (e.g. sda) - size the size in bytes - dev the device number (e.g. 8704 for hdg) - partitions is [(name, size, dev), ...] mirroring the disk list data - inuse is a boolean showing the in-use status of the disk, computed as the - possibility of re-reading the partition table (the meaning of the - operation varies with the kernel version, but is usually accurate; - a mounted disk/partition or swap-area or PV with active LVs on it - is busy) - """ + @return: a list like [(name, size, dev, partitions, inuse), ...], where: + - name is the block device name (e.g. sda) + - size the size in bytes + - dev is the device number (e.g. 8704 for hdg) + - partitions is [(name, size, dev), ...] mirroring the disk list + data inuse is a boolean showing the in-use status of the disk, + computed as the possibility of re-reading the partition table + (the meaning of the operation varies with the kernel version, + but is usually accurate; a mounted disk/partition or swap-area + or PV with active LVs on it is busy) + """ dlist = [] for name in os.listdir("/sys/block"): - if (not name.startswith("hd") and - not name.startswith("sd") and - not name.startswith("ubd")): + if not compat.any([name.startswith(pfx) for pfx in SUPPORTED_TYPES]): continue - size = ReadSize("/sys/block/%s" % name) + disksysfsname = "/sys/block/%s" % name + size = ReadSize(disksysfsname) f = open("/sys/block/%s/removable" % name) removable = int(f.read().strip()) @@ -392,18 +459,21 @@ def GetDiskList(opts): if removable and not opts.removable_ok: continue - dev = ReadDev("/sys/block/%s" % name) + dev = ReadDev(disksysfsname) CheckSysDev(name, dev) - inuse = not CheckReread(name) + inuse = InUse(name) # Enumerate partitions of the block device partitions = [] - for partname in os.listdir("/sys/block/%s" % name): + for partname in os.listdir(disksysfsname): if not partname.startswith(name): continue - partdev = ReadDev("/sys/block/%s/%s" % (name, partname)) - partsize = ReadSize("/sys/block/%s/%s" % (name, partname)) - CheckSysDev(partname, partdev) - partitions.append((partname, partsize, partdev)) + partsysfsname = "%s/%s" % (disksysfsname, partname) + partdev = ReadDev(partsysfsname) + partsize = ReadSize(partsysfsname) + if partsize >= PART_MINSIZE: + CheckSysDev(partname, partdev) + partinuse = InUse(partname) + partitions.append((partname, partsize, partdev, partinuse)) partitions.sort() dlist.append((name, size, dev, partitions, inuse)) dlist.sort() @@ -419,16 +489,16 @@ def GetMountInfo(): of the results is memorised for later matching against the /sys/block devices. - Returns: - a mountpoint: device number dictionary - """ + @rtype: dict + @return: a {mountpoint: device number} dictionary + """ mountlines = ReadFile("/proc/mounts").splitlines() mounts = {} for line in mountlines: - device, mountpoint, fstype, rest = line.split(None, 3) + _, mountpoint, fstype, _ = line.split(None, 3) # fs type blacklist - if fstype in ["nfs", "nfs4", "autofs", "tmpfs", "proc", "sysfs"]: + if fstype in EXCLUDED_FS: continue try: dev = os.stat(mountpoint).st_dev @@ -445,19 +515,26 @@ def GetMountInfo(): return mounts +def GetSwapInfo(): + """Reads /proc/swaps and returns the list of swap backing stores. + + """ + swaplines = ReadFile("/proc/swaps").splitlines()[1:] + return [line.split(None, 1)[0] for line in swaplines] + + def DevInfo(name, dev, mountinfo): """Computes miscellaneous information about a block device. - Args: - name: the device name, e.g. sda + @type name: string + @param name: the device name, e.g. sda - Returns: - (mpath, whatvg, fileinfo), where - mpath is the mount path where this device is mounted or None - whatvg is the result of the ReadPV function - fileinfo is the output of file -bs on the device - """ + @return: a tuple (mpath, whatvg, fileinfo), where: + - mpath is the mount path where this device is mounted or None + - whatvg is the result of the ReadPV function + - fileinfo is the output of file -bs on the device + """ if dev in mountinfo: mpath = mountinfo[dev] else: @@ -480,6 +557,12 @@ def ShowDiskInfo(opts): choice about which disks should be allocated to our volume group. """ + def _inuse(inuse): + if inuse: + return "yes" + else: + return "no" + mounts = GetMountInfo() dlist = GetDiskList(opts) @@ -497,13 +580,9 @@ def ShowDiskInfo(opts): flatlist = [] # Flatten the [(disk, [partition,...]), ...] list for name, size, dev, parts, inuse in dlist: - if inuse: - str_inuse = "yes" - else: - str_inuse = "no" - flatlist.append((name, size, dev, str_inuse)) - for partname, partsize, partdev in parts: - flatlist.append((partname, partsize, partdev, "")) + flatlist.append((name, size, dev, _inuse(inuse))) + for partname, partsize, partdev, partinuse in parts: + flatlist.append((partname, partsize, partdev, _inuse(partinuse))) strlist = [] for name, size, dev, in_use in flatlist: @@ -531,24 +610,83 @@ def ShowDiskInfo(opts): print line +def CheckSysfsHolders(name): + """Check to see if a device is 'hold' at sysfs level. + + This is usually the case for Physical Volumes under LVM. + + @rtype: boolean + @return: true if the device is available according to sysfs + + """ + try: + contents = os.listdir("%s/holders/" % SysfsName(name)) + except OSError, err: + if err.errno == errno.ENOENT: + contents = [] + else: + raise + return not bool(contents) + + def CheckReread(name): """Check to see if a block device is in use. - Uses blockdev to reread the partition table of a block device, and - thus compute the in-use status. See the discussion in GetDiskList - about the meaning of 'in use'. + Uses blockdev to reread the partition table of a block device (or + fuser if the device is not partitionable), and thus compute the + in-use status. See the discussion in GetDiskList about the meaning + of 'in use'. + + @rtype: boolean + @return: the in-use status of the device - Returns: - boolean, the in-use status of the device """ + use_blockdev = IsPartitioned(name) + if use_blockdev: + cmd = "blockdev --rereadpt /dev/%s" % name + else: + cmd = "fuser -vam /dev/%s" % name - for retries in range(3): - result = ExecCommand("blockdev --rereadpt /dev/%s" % name) - if not result.failed: + for _ in range(3): + result = ExecCommand(cmd) + if not use_blockdev and result.failed: + break + elif use_blockdev and not result.failed: break time.sleep(2) - return not result.failed + if use_blockdev: + return not result.failed + else: + return result.failed + + +def CheckMounted(name): + """Check to see if a block device is a mountpoint. + + In recent distros/kernels, this is reported directly via fuser, but + on older ones not, so we do an additional check here (manually). + + """ + minfo = GetMountInfo() + dev = ReadDev(SysfsName(name)) + return dev not in minfo + + +def CheckSwap(name): + """Check to see if a block device is being used as swap. + + """ + name = "/dev/%s" % name + return name not in GetSwapInfo() + + +def InUse(name): + """Returns if a disk is in use or not. + + """ + return not (CheckSysfsHolders(name) and CheckReread(name) and + CheckMounted(name) and CheckSwap(name)) def WipeDisk(name): @@ -558,11 +696,11 @@ def WipeDisk(name): partition table. If not successful, it writes back the old partition data, and leaves the cleanup to the user. - Args: - the device name (e.g. sda) + @param name: the device name (e.g. sda) + """ - if not CheckReread(name): + if InUse(name): raise OperationalError("CRITICAL: disk %s you selected seems to be in" " use. ABORTING!" % name) @@ -582,7 +720,8 @@ def WipeDisk(name): " %d. I don't know how to cleanup. Sorry." % (name, bytes_written)) - if not CheckReread(name): + if InUse(name): + # try to restore the data fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC) os.write(fd, olddata) os.close(fd) @@ -594,24 +733,58 @@ def WipeDisk(name): name) -def PartitionDisk(name): +def PartitionDisk(name, use_sfdisk): """Partitions a disk. This function creates a single partition spanning the entire disk, by means of fdisk. - Args: - the device name, e.g. sda + @param name: the device name, e.g. sda + """ - result = ExecCommand( - 'echo ,,8e, | sfdisk /dev/%s' % name) + + # Check that parted exists + result = ExecCommand("parted --help") if result.failed: - raise OperationalError("CRITICAL: disk %s which I have just partitioned" - " cannot reread its partition table, or there" - " is some other sfdisk error. Likely, it is in" - " use. You have to clean this yourself. Error" - " message from sfdisk: %s" % - (name, result.output)) + use_sfdisk = True + print >> sys.stderr, ("Unable to execute \"parted --help\"," + " falling back to sfdisk.") + + # Check disk size - over 2TB means we need to use GPT + size = ReadSize("/sys/block/%s" % name) + if size > MBR_MAX_SIZE: + label_type = "gpt" + if use_sfdisk: + raise OperationalError("Critical: Disk larger than 2TB detected, but" + " parted is either not installed or --use-sfdisk" + " has been specified") + else: + label_type = "msdos" + + if use_sfdisk: + result = ExecCommand( + "echo ,,8e, | sfdisk /dev/%s" % name) + if result.failed: + raise OperationalError("CRITICAL: disk %s which I have just partitioned" + " cannot reread its partition table, or there" + " is some other sfdisk error. Likely, it is in" + " use. You have to clean this yourself. Error" + " message from sfdisk: %s" % + (name, result.output)) + + else: + result = ExecCommand("parted -s /dev/%s mklabel %s" % (name, label_type)) + if result.failed: + raise OperationalError("Critical: failed to create %s label on %s" % + (label_type, name)) + result = ExecCommand("parted -s /dev/%s mkpart pri ext2 1 100%%" % name) + if result.failed: + raise OperationalError("Critical: failed to create partition on %s" % + name) + result = ExecCommand("parted -s /dev/%s set 1 lvm on" % name) + if result.failed: + raise OperationalError("Critical: failed to set partition on %s to LVM" % + name) def CreatePVOnDisk(name): @@ -620,16 +793,16 @@ def CreatePVOnDisk(name): This function creates a physical volume on a block device, overriding all warnings. So it can wipe existing PVs and PVs which are in a VG. - Args: - the device name, e.g. sda + @param name: the device name, e.g. sda """ - result = ExecCommand("pvcreate -yff /dev/%s1 " % name) + device = DeviceName(name) + result = ExecCommand("pvcreate -yff %s" % device) if result.failed: raise OperationalError("I cannot create a physical volume on" - " partition /dev/%s1. Error message: %s." + " %s. Error message: %s." " Please clean up yourself." % - (name, result.output)) + (device, result.output)) def CreateVG(vgname, disks): @@ -638,11 +811,10 @@ def CreateVG(vgname, disks): This function creates a volume group named `vgname` on the disks given as parameters. The physical extent size is set to 64MB. - Args: - disks: a list of disk names, e.g. ['sda','sdb'] + @param disks: a list of disk names, e.g. ['sda','sdb'] """ - pnames = ["'/dev/%s1'" % disk for disk in disks] + pnames = [DeviceName(d) for d in disks] result = ExecCommand("vgcreate -s 64MB '%s' %s" % (vgname, " ".join(pnames))) if result.failed: raise OperationalError("I cannot create the volume group %s from" @@ -659,22 +831,25 @@ def ValidateDiskList(options): using the --disks option) such that all given disks are present and not in use. - Args: - the options returned from OptParser.parse_options + @param options: the options returned from OptParser.parse_options - Returns: - a list of disk names, e.g. ['sda', 'sdb'] - """ + @return: a list of disk names, e.g. ['sda', 'sdb'] + """ sysdisks = GetDiskList(options) if not sysdisks: raise PrereqError("no disks found (I looked for" " non-removable block devices).") sysd_free = [] sysd_used = [] - for name, size, dev, part, used in sysdisks: + for name, _, _, parts, used in sysdisks: if used: sysd_used.append(name) + for partname, _, _, partused in parts: + if partused: + sysd_used.append(partname) + else: + sysd_free.append(partname) else: sysd_free.append(name) @@ -697,8 +872,9 @@ def ValidateDiskList(options): def BootStrap(): - """Actual main routine.""" + """Actual main routine. + """ CheckPrereq() options, args = ParseOptions() @@ -716,17 +892,17 @@ def BootStrap(): " LV count: %s, size: %s, free: %s." % (vgname, lv_count, vg_size, vg_free)) - disklist = ValidateDiskList(options) for disk in disklist: WipeDisk(disk) - PartitionDisk(disk) + if IsPartitioned(disk): + PartitionDisk(disk, options.use_sfdisk) for disk in disklist: CreatePVOnDisk(disk) CreateVG(vgname, disklist) - status, lv_count, size, free = CheckVGExists(vgname) + status, lv_count, size, _ = CheckVGExists(vgname) if status: print "Done! %s: size %s GiB, disks: %s" % (vgname, size, ",".join(disklist)) @@ -736,11 +912,11 @@ def BootStrap(): def main(): - """application entry point. + """Application entry point. This is just a wrapper over BootStrap, to handle our own exceptions. - """ + """ try: BootStrap() except PrereqError, err: @@ -771,8 +947,8 @@ def main(): print >> sys.stderr, str(err) sys.exit(1) except ProgrammingError, err: - print >> sys.stderr, ("Internal application error. Please signal this" - " to xencluster-team.") + print >> sys.stderr, ("Internal application error. Please report this" + " to the Ganeti developer list.") print >> sys.stderr, "Error description: %s" % str(err) sys.exit(1) except Error, err: