4 # Copyright (C) 2006, 2007, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Program which configures LVM on the Ganeti nodes.
24 This program wipes disks and creates a volume group on top of them. It
25 can also show disk information to help you decide which disks you want
28 The error handling is done by raising our own exceptions from most of
29 the functions; these exceptions then handled globally in the main()
30 function. The exceptions that each function can raise are not
31 documented individually, since almost every error path ends in a
34 Another two exceptions that are handled globally are IOError and
35 OSError. The idea behind this is, since we run as root, we should
36 usually not get these errors, but if we do it's most probably a system
37 error, so they should be handled and the user instructed to report
50 from ganeti.utils import RunCmd, ReadFile
51 from ganeti import constants
52 from ganeti import cli
53 from ganeti import compat
55 USAGE = ("\tlvmstrap diskinfo\n"
56 "\tlvmstrap [--vgname=NAME] [--allow-removable]"
57 " { --alldisks | --disks DISKLIST }"
62 #: Supported disk types (as prefixes)
70 #: Excluded filesystem types
71 EXCLUDED_FS = frozenset([
82 #: A regular expression that matches partitions (must be kept in sync
83 # with L{SUPPORTED_TYPES}
84 PART_RE = re.compile("^((?:h|s|m|ub)d[a-z]{1,2})[0-9]+$")
86 #: Minimum partition size to be considered (1 GB)
87 PART_MINSIZE = 1024 * 1024 * 1024
90 class Error(Exception):
91 """Generic exception"""
95 class ProgrammingError(Error):
96 """Exception denoting invalid assumptions in programming.
98 This should catch sysfs tree changes, or otherwise incorrect
99 assumptions about the contents of the /sys/block/... directories.
105 class SysconfigError(Error):
106 """Exception denoting invalid system configuration.
108 If the system configuration is somehow wrong (e.g. /dev files
109 missing, or having mismatched major/minor numbers relative to
110 /sys/block devices), this exception will be raised.
112 This should usually mean that the installation of the Xen node
113 failed in some steps.
119 class PrereqError(Error):
120 """Exception denoting invalid prerequisites.
122 If the node does not meet the requirements for cluster membership, this
123 exception will be raised. Things like wrong kernel version, or no
124 free disks, etc. belong here.
126 This should usually mean that the build steps for the Xen node were
127 not followed correctly.
133 class OperationalError(Error):
134 """Exception denoting actual errors.
136 Errors during the bootstrapping are signaled using this exception.
142 class ParameterError(Error):
143 """Exception denoting invalid input from user.
145 Wrong disks given as parameters will be signaled using this
153 """Shows program usage information and exits the program.
156 print >> sys.stderr, "Usage:"
157 print >> sys.stderr, USAGE
162 """Parses the command line options.
164 In case of command line errors, it will show the usage and exit the
168 @return: a tuple of (options, args), as returned by
169 OptionParser.parse_args
172 global verbose_flag # pylint: disable-msg=W0603
174 parser = optparse.OptionParser(usage="\n%s" % USAGE,
175 version="%%prog (ganeti) %s" %
176 constants.RELEASE_VERSION)
178 parser.add_option("--alldisks", dest="alldisks",
179 help="erase ALL disks", action="store_true",
181 parser.add_option("-d", "--disks", dest="disks",
182 help="Choose disks (e.g. hda,hdg)",
184 parser.add_option(cli.VERBOSE_OPT)
185 parser.add_option("-r", "--allow-removable",
186 action="store_true", dest="removable_ok", default=False,
187 help="allow and use removable devices too")
188 parser.add_option("-g", "--vg-name", type="string",
189 dest="vgname", default="xenvg", metavar="NAME",
190 help="the volume group to be created [default: xenvg]")
193 options, args = parser.parse_args()
197 verbose_flag = options.verbose
202 def IsPartitioned(disk):
203 """Returns whether a given disk should be used partitioned or as-is.
205 Currently only md devices are used as is.
208 return not (disk.startswith('md') or PART_RE.match(disk))
211 def DeviceName(disk):
212 """Returns the appropriate device name for a disk.
214 For non-partitioned devices, it returns the name as is, otherwise it
215 returns the first partition.
218 if IsPartitioned(disk):
219 device = '/dev/%s1' % disk
221 device = '/dev/%s' % disk
226 """Returns the sysfs name for a disk or partition.
229 match = PART_RE.match(disk)
231 # this is a partition, which resides in /sys/block under a different name
232 disk = "%s/%s" % (match.group(1), disk)
233 return "/sys/block/%s" % disk
236 def ExecCommand(command):
237 """Executes a command.
239 This is just a wrapper around commands.getstatusoutput, with the
240 difference that if the command line argument -v has been given, it
241 will print the command line and the command output on stdout.
243 @param command: the command line to be executed
245 @return: a tuple of (status, output) where status is the exit status
246 and output the stdout and stderr of the command together
251 result = RunCmd(command)
258 """Check the prerequisites of this program.
260 It check that it runs on Linux 2.6, and that /sys is mounted and the
261 fact that /sys/block is a directory.
265 raise PrereqError("This tool runs as root only. Really.")
267 osname, _, release, _, _ = os.uname()
268 if osname != 'Linux':
269 raise PrereqError("This tool only runs on Linux"
270 " (detected OS: %s)." % osname)
272 if not release.startswith("2.6."):
273 raise PrereqError("Wrong major kernel version (detected %s, needs"
276 if not os.path.ismount("/sys"):
277 raise PrereqError("Can't find a filesystem mounted at /sys."
278 " Please mount /sys.")
280 if not os.path.isdir("/sys/block"):
281 raise SysconfigError("Can't find /sys/block directory. Has the"
282 " layout of /sys changed?")
284 if not os.path.ismount("/proc"):
285 raise PrereqError("Can't find a filesystem mounted at /proc."
286 " Please mount /proc.")
288 if not os.path.exists("/proc/mounts"):
289 raise SysconfigError("Can't find /proc/mounts")
292 def CheckVGExists(vgname):
293 """Checks to see if a volume group exists.
295 @param vgname: the volume group name
297 @return: a four-tuple (exists, lv_count, vg_size, vg_free), where:
298 - exists: True if the volume exists, otherwise False; if False,
299 all other members of the tuple are None
300 - lv_count: The number of logical volumes in the volume group
301 - vg_size: The total size of the volume group (in gibibytes)
302 - vg_free: The available space in the volume group
305 result = ExecCommand("vgs --nohead -o lv_count,vg_size,vg_free"
306 " --nosuffix --units g"
307 " --ignorelockingfailure %s" % vgname)
308 if not result.failed:
310 lv_count, vg_size, vg_free = result.stdout.strip().split()
312 # This means the output of vgdisplay can't be parsed
313 raise PrereqError("cannot parse output of vgs (%s)" % result.stdout)
315 lv_count = vg_size = vg_free = None
317 return not result.failed, lv_count, vg_size, vg_free
320 def CheckSysDev(name, devnum):
321 """Checks consistency between /sys and /dev trees.
323 In /sys/block/<name>/dev and /sys/block/<name>/<part>/dev are the
324 kernel-known device numbers. The /dev/<name> block/char devices are
325 created by userspace and thus could differ from the kernel
326 view. This function checks the consistency between the device number
327 read from /sys and the actual device number in /dev.
329 Note that since the system could be using udev which removes and
330 recreates the device nodes on partition table rescan, we need to do
331 some retries here. Since we only do a stat, we can afford to do many
334 @param name: the device name, e.g. 'sda'
335 @param devnum: the device number, e.g. 0x803 (2051 in decimal) for sda3
336 @raises L{SysconfigError}: in case of failure of the check
339 path = "/dev/%s" % name
341 if os.path.exists(path):
345 raise SysconfigError("the device file %s does not exist, but the block"
346 " device exists in the /sys/block tree" % path)
347 rdev = os.stat(path).st_rdev
349 raise SysconfigError("For device %s, the major:minor in /dev is %04x"
350 " while the major:minor in sysfs is %s" %
351 (path, rdev, devnum))
354 def ReadDev(syspath):
355 """Reads the device number from a sysfs path.
357 The device number is given in sysfs under a block device directory
358 in a file named 'dev' which contains major:minor (in ASCII). This
359 function reads that file and converts the major:minor pair to a dev
362 @type syspath: string
363 @param syspath: the path to a block device dir in sysfs,
364 e.g. C{/sys/block/sda}
366 @return: the device number
369 if not os.path.exists("%s/dev" % syspath):
370 raise ProgrammingError("Invalid path passed to ReadDev: %s" % syspath)
371 f = open("%s/dev" % syspath)
372 data = f.read().strip()
374 major, minor = data.split(":", 1)
377 dev = os.makedev(major, minor)
381 def ReadSize(syspath):
382 """Reads the size from a sysfs path.
384 The size is given in sysfs under a block device directory in a file
385 named 'size' which contains the number of sectors (in ASCII). This
386 function reads that file and converts the number in sectors to the
389 @type syspath: string
390 @param syspath: the path to a block device dir in sysfs,
391 e.g. C{/sys/block/sda}
394 @return: the device size in bytes
398 if not os.path.exists("%s/size" % syspath):
399 raise ProgrammingError("Invalid path passed to ReadSize: %s" % syspath)
400 f = open("%s/size" % syspath)
401 data = f.read().strip()
403 size = 512L * int(data)
408 """Reads physical volume information.
410 This function tries to see if a block device is a physical volume.
413 @param name: the device name (e.g. sda)
415 @return: the name of the volume group to which this PV belongs, or
416 "" if this PV is not in use, or None if this is not a PV
419 result = ExecCommand("pvdisplay -c /dev/%s" % name)
422 vgname = result.stdout.strip().split(":")[1]
426 def GetDiskList(opts):
427 """Computes the block device list for this system.
429 This function examines the /sys/block tree and using information
430 therein, computes the status of the block device.
432 @return: a list like [(name, size, dev, partitions, inuse), ...], where:
433 - name is the block device name (e.g. sda)
434 - size the size in bytes
435 - dev is the device number (e.g. 8704 for hdg)
436 - partitions is [(name, size, dev), ...] mirroring the disk list
437 data inuse is a boolean showing the in-use status of the disk,
438 computed as the possibility of re-reading the partition table
439 (the meaning of the operation varies with the kernel version,
440 but is usually accurate; a mounted disk/partition or swap-area
441 or PV with active LVs on it is busy)
445 for name in os.listdir("/sys/block"):
446 if not compat.any([name.startswith(pfx) for pfx in SUPPORTED_TYPES]):
449 disksysfsname = "/sys/block/%s" % name
450 size = ReadSize(disksysfsname)
452 f = open("/sys/block/%s/removable" % name)
453 removable = int(f.read().strip())
456 if removable and not opts.removable_ok:
459 dev = ReadDev(disksysfsname)
460 CheckSysDev(name, dev)
462 # Enumerate partitions of the block device
464 for partname in os.listdir(disksysfsname):
465 if not partname.startswith(name):
467 partsysfsname = "%s/%s" % (disksysfsname, partname)
468 partdev = ReadDev(partsysfsname)
469 partsize = ReadSize(partsysfsname)
470 if partsize >= PART_MINSIZE:
471 CheckSysDev(partname, partdev)
472 partinuse = InUse(partname)
473 partitions.append((partname, partsize, partdev, partinuse))
475 dlist.append((name, size, dev, partitions, inuse))
481 """Reads /proc/mounts and computes the mountpoint-devnum mapping.
483 This function reads /proc/mounts, finds the mounted filesystems
484 (excepting a hard-coded blacklist of network and virtual
485 filesystems) and does a stat on these mountpoints. The st_dev number
486 of the results is memorised for later matching against the
490 @return: a {mountpoint: device number} dictionary
493 mountlines = ReadFile("/proc/mounts").splitlines()
495 for line in mountlines:
496 _, mountpoint, fstype, _ = line.split(None, 3)
498 if fstype in EXCLUDED_FS:
501 dev = os.stat(mountpoint).st_dev
503 # this should be a fairly rare error, since we are blacklisting
504 # network filesystems; with this in mind, we'll ignore it,
505 # since the rereadpt check catches in-use filesystems,
506 # and this is used for disk information only
507 print >> sys.stderr, ("Can't stat mountpoint '%s': %s" %
509 print >> sys.stderr, "Ignoring."
511 mounts[dev] = mountpoint
516 """Reads /proc/swaps and returns the list of swap backing stores.
519 swaplines = ReadFile("/proc/swaps").splitlines()[1:]
520 return [line.split(None, 1)[0] for line in swaplines]
523 def DevInfo(name, dev, mountinfo):
524 """Computes miscellaneous information about a block device.
527 @param name: the device name, e.g. sda
529 @return: a tuple (mpath, whatvg, fileinfo), where:
530 - mpath is the mount path where this device is mounted or None
531 - whatvg is the result of the ReadPV function
532 - fileinfo is the output of file -bs on the device
536 mpath = mountinfo[dev]
540 whatvg = ReadPV(name)
542 result = ExecCommand("file -bs /dev/%s" % name)
544 fileinfo = "<error: %s>" % result.stderr
545 fileinfo = result.stdout[:45]
546 return mpath, whatvg, fileinfo
549 def ShowDiskInfo(opts):
550 """Shows a nicely formatted block device list for this system.
552 This function shows the user a table with the information gathered
553 by the other functions defined, in order to help the user make a
554 choice about which disks should be allocated to our volume group.
563 mounts = GetMountInfo()
564 dlist = GetDiskList(opts)
566 print "------- Disk information -------"
575 fields = ["name", "size", "used", "mount", "lvm", "info"]
578 # Flatten the [(disk, [partition,...]), ...] list
579 for name, size, dev, parts, inuse in dlist:
580 flatlist.append((name, size, dev, _inuse(inuse)))
581 for partname, partsize, partdev, partinuse in parts:
582 flatlist.append((partname, partsize, partdev, _inuse(partinuse)))
585 for name, size, dev, in_use in flatlist:
586 mp, vgname, fileinfo = DevInfo(name, dev, mounts)
594 lvminfo = "in %s" % vgname
600 strlist.append([name, "%.2f" % (float(size) / 1024 / 1024),
601 in_use, mp, lvminfo, fileinfo])
603 data = cli.GenerateTable(headers, fields, None,
604 strlist, numfields=["size"])
610 def CheckSysfsHolders(name):
611 """Check to see if a device is 'hold' at sysfs level.
613 This is usually the case for Physical Volumes under LVM.
616 @return: true if the device is available according to sysfs
620 contents = os.listdir("%s/holders/" % SysfsName(name))
622 if err.errno == errno.ENOENT:
626 return not bool(contents)
629 def CheckReread(name):
630 """Check to see if a block device is in use.
632 Uses blockdev to reread the partition table of a block device (or
633 fuser if the device is not partitionable), and thus compute the
634 in-use status. See the discussion in GetDiskList about the meaning
638 @return: the in-use status of the device
641 use_blockdev = IsPartitioned(name)
643 cmd = "blockdev --rereadpt /dev/%s" % name
645 cmd = "fuser -vam /dev/%s" % name
648 result = ExecCommand(cmd)
649 if not use_blockdev and result.failed:
651 elif not result.failed:
656 return not result.failed
661 def CheckMounted(name):
662 """Check to see if a block device is a mountpoint.
664 In recent distros/kernels, this is reported directly via fuser, but
665 on older ones not, so we do an additional check here (manually).
668 minfo = GetMountInfo()
669 dev = ReadDev(SysfsName(name))
670 return dev not in minfo
674 """Check to see if a block device is being used as swap.
677 name = "/dev/%s" % name
678 return name not in GetSwapInfo()
682 """Returns if a disk is in use or not.
685 return not (CheckSysfsHolders(name) and CheckReread(name) and
686 CheckMounted(name) and CheckSwap(name))
690 """Wipes a block device.
692 This function wipes a block device, by clearing and re-reading the
693 partition table. If not successful, it writes back the old partition
694 data, and leaves the cleanup to the user.
696 @param name: the device name (e.g. sda)
701 raise OperationalError("CRITICAL: disk %s you selected seems to be in"
702 " use. ABORTING!" % name)
704 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
705 olddata = os.read(fd, 512)
706 if len(olddata) != 512:
707 raise OperationalError("CRITICAL: Can't read partition table information"
708 " from /dev/%s (needed 512 bytes, got %d" %
709 (name, len(olddata)))
712 bytes_written = os.write(fd, newdata)
714 if bytes_written != 512:
715 raise OperationalError("CRITICAL: Can't write partition table information"
716 " to /dev/%s (tried to write 512 bytes, written"
717 " %d. I don't know how to cleanup. Sorry." %
718 (name, bytes_written))
721 # try to restore the data
722 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
723 os.write(fd, olddata)
725 raise OperationalError("CRITICAL: disk %s which I have just wiped cannot"
726 " reread partition table. Most likely, it is"
727 " in use. You have to clean after this yourself."
728 " I tried to restore the old partition table,"
729 " but I cannot guarantee nothing has broken." %
733 def PartitionDisk(name):
734 """Partitions a disk.
736 This function creates a single partition spanning the entire disk,
739 @param name: the device name, e.g. sda
742 result = ExecCommand(
743 'echo ,,8e, | sfdisk /dev/%s' % name)
745 raise OperationalError("CRITICAL: disk %s which I have just partitioned"
746 " cannot reread its partition table, or there"
747 " is some other sfdisk error. Likely, it is in"
748 " use. You have to clean this yourself. Error"
749 " message from sfdisk: %s" %
750 (name, result.output))
753 def CreatePVOnDisk(name):
754 """Creates a physical volume on a block device.
756 This function creates a physical volume on a block device, overriding
757 all warnings. So it can wipe existing PVs and PVs which are in a VG.
759 @param name: the device name, e.g. sda
762 device = DeviceName(name)
763 result = ExecCommand("pvcreate -yff %s" % device)
765 raise OperationalError("I cannot create a physical volume on"
766 " %s. Error message: %s."
767 " Please clean up yourself." %
768 (device, result.output))
771 def CreateVG(vgname, disks):
772 """Creates the volume group.
774 This function creates a volume group named `vgname` on the disks
775 given as parameters. The physical extent size is set to 64MB.
777 @param disks: a list of disk names, e.g. ['sda','sdb']
780 pnames = [DeviceName(d) for d in disks]
781 result = ExecCommand("vgcreate -s 64MB '%s' %s" % (vgname, " ".join(pnames)))
783 raise OperationalError("I cannot create the volume group %s from"
784 " disks %s. Error message: %s. Please clean up"
786 (vgname, " ".join(disks), result.output))
789 def ValidateDiskList(options):
790 """Validates or computes the disk list for create.
792 This function either computes the available disk list (if the user
793 gave --alldisks option), or validates the user-given disk list (by
794 using the --disks option) such that all given disks are present and
797 @param options: the options returned from OptParser.parse_options
799 @return: a list of disk names, e.g. ['sda', 'sdb']
802 sysdisks = GetDiskList(options)
804 raise PrereqError("no disks found (I looked for"
805 " non-removable block devices).")
808 for name, _, _, parts, used in sysdisks:
810 sysd_used.append(name)
811 for partname, _, _, partused in parts:
813 sysd_used.append(partname)
815 sysd_free.append(partname)
817 sysd_free.append(name)
820 raise PrereqError("no free disks found! (%d in-use disks)" %
825 disklist = options.disks.split(",")
826 for name in disklist:
827 if name in sysd_used:
828 raise ParameterError("disk %s is in use, cannot wipe!" % name)
829 if name not in sysd_free:
830 raise ParameterError("cannot find disk %s!" % name)
832 raise ParameterError("Please use either --alldisks or --disks!")
838 """Actual main routine.
843 options, args = ParseOptions()
844 vgname = options.vgname
845 command = args.pop(0)
846 if command == "diskinfo":
847 ShowDiskInfo(options)
849 if command != "create":
852 exists, lv_count, vg_size, vg_free = CheckVGExists(vgname)
854 raise PrereqError("It seems volume group '%s' already exists:\n"
855 " LV count: %s, size: %s, free: %s." %
856 (vgname, lv_count, vg_size, vg_free))
859 disklist = ValidateDiskList(options)
861 for disk in disklist:
863 if IsPartitioned(disk):
865 for disk in disklist:
867 CreateVG(vgname, disklist)
869 status, lv_count, size, _ = CheckVGExists(vgname)
871 print "Done! %s: size %s GiB, disks: %s" % (vgname, size,
874 raise OperationalError("Although everything seemed ok, the volume"
875 " group did not get created.")
879 """Application entry point.
881 This is just a wrapper over BootStrap, to handle our own exceptions.
886 except PrereqError, err:
887 print >> sys.stderr, "The prerequisites for running this tool are not met."
888 print >> sys.stderr, ("Please make sure you followed all the steps in"
889 " the build document.")
890 print >> sys.stderr, "Description: %s" % str(err)
892 except SysconfigError, err:
893 print >> sys.stderr, ("This system's configuration seems wrong, at"
894 " least is not what I expect.")
895 print >> sys.stderr, ("Please check that the installation didn't fail"
897 print >> sys.stderr, "Description: %s" % str(err)
899 except ParameterError, err:
900 print >> sys.stderr, ("Some parameters you gave to the program or the"
901 " invocation is wrong. ")
902 print >> sys.stderr, "Description: %s" % str(err)
904 except OperationalError, err:
905 print >> sys.stderr, ("A serious error has happened while modifying"
906 " the system's configuration.")
907 print >> sys.stderr, ("Please review the error message below and make"
908 " sure you clean up yourself.")
909 print >> sys.stderr, ("It is most likely that the system configuration"
910 " has been partially altered.")
911 print >> sys.stderr, str(err)
913 except ProgrammingError, err:
914 print >> sys.stderr, ("Internal application error. Please report this"
915 " to the Ganeti developer list.")
916 print >> sys.stderr, "Error description: %s" % str(err)
919 print >> sys.stderr, "Unhandled application error: %s" % err
921 except (IOError, OSError), err:
922 print >> sys.stderr, "I/O error detected, please report."
923 print >> sys.stderr, "Description: %s" % str(err)
927 if __name__ == "__main__":