4 # Copyright (C) 2006, 2007, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Program which configures LVM on the Ganeti nodes.
24 This program wipes disks and creates a volume group on top of them. It
25 can also show disk information to help you decide which disks you want
28 The error handling is done by raising our own exceptions from most of
29 the functions; these exceptions then handled globally in the main()
30 function. The exceptions that each function can raise are not
31 documented individually, since almost every error path ends in a
34 Another two exceptions that are handled globally are IOError and
35 OSError. The idea behind this is, since we run as root, we should
36 usually not get these errors, but if we do it's most probably a system
37 error, so they should be handled and the user instructed to report
49 from ganeti.utils import RunCmd, ReadFile
50 from ganeti import constants
51 from ganeti import cli
52 from ganeti import compat
54 USAGE = ("\tlvmstrap diskinfo\n"
55 "\tlvmstrap [--vgname=NAME] [--allow-removable]"
56 " { --alldisks | --disks DISKLIST }"
61 #: Supported disk types (as prefixes)
70 class Error(Exception):
71 """Generic exception"""
75 class ProgrammingError(Error):
76 """Exception denoting invalid assumptions in programming.
78 This should catch sysfs tree changes, or otherwise incorrect
79 assumptions about the contents of the /sys/block/... directories.
85 class SysconfigError(Error):
86 """Exception denoting invalid system configuration.
88 If the system configuration is somehow wrong (e.g. /dev files
89 missing, or having mismatched major/minor numbers relative to
90 /sys/block devices), this exception will be raised.
92 This should usually mean that the installation of the Xen node
99 class PrereqError(Error):
100 """Exception denoting invalid prerequisites.
102 If the node does not meet the requirements for cluster membership, this
103 exception will be raised. Things like wrong kernel version, or no
104 free disks, etc. belong here.
106 This should usually mean that the build steps for the Xen node were
107 not followed correctly.
113 class OperationalError(Error):
114 """Exception denoting actual errors.
116 Errors during the bootstrapping are signaled using this exception.
122 class ParameterError(Error):
123 """Exception denoting invalid input from user.
125 Wrong disks given as parameters will be signaled using this
133 """Shows program usage information and exits the program.
136 print >> sys.stderr, "Usage:"
137 print >> sys.stderr, USAGE
142 """Parses the command line options.
144 In case of command line errors, it will show the usage and exit the
148 @return: a tuple of (options, args), as returned by
149 OptionParser.parse_args
152 global verbose_flag # pylint: disable-msg=W0603
154 parser = optparse.OptionParser(usage="\n%s" % USAGE,
155 version="%%prog (ganeti) %s" %
156 constants.RELEASE_VERSION)
158 parser.add_option("--alldisks", dest="alldisks",
159 help="erase ALL disks", action="store_true",
161 parser.add_option("-d", "--disks", dest="disks",
162 help="Choose disks (e.g. hda,hdg)",
164 parser.add_option(cli.VERBOSE_OPT)
165 parser.add_option("-r", "--allow-removable",
166 action="store_true", dest="removable_ok", default=False,
167 help="allow and use removable devices too")
168 parser.add_option("-g", "--vg-name", type="string",
169 dest="vgname", default="xenvg", metavar="NAME",
170 help="the volume group to be created [default: xenvg]")
173 options, args = parser.parse_args()
177 verbose_flag = options.verbose
182 def IsPartitioned(disk):
183 """Returns whether a given disk should be used partitioned or as-is.
185 Currently only md devices are used as is.
188 return not disk.startswith('md')
191 def DeviceName(disk):
192 """Returns the appropriate device name for a disk.
194 For non-partitioned devices, it returns the name as is, otherwise it
195 returns the first partition.
198 if IsPartitioned(disk):
199 device = '/dev/%s1' % disk
201 device = '/dev/%s' % disk
205 def ExecCommand(command):
206 """Executes a command.
208 This is just a wrapper around commands.getstatusoutput, with the
209 difference that if the command line argument -v has been given, it
210 will print the command line and the command output on stdout.
212 @param command: the command line to be executed
214 @return: a tuple of (status, output) where status is the exit status
215 and output the stdout and stderr of the command together
220 result = RunCmd(command)
227 """Check the prerequisites of this program.
229 It check that it runs on Linux 2.6, and that /sys is mounted and the
230 fact that /sys/block is a directory.
234 raise PrereqError("This tool runs as root only. Really.")
236 osname, _, release, _, _ = os.uname()
237 if osname != 'Linux':
238 raise PrereqError("This tool only runs on Linux"
239 " (detected OS: %s)." % osname)
241 if not release.startswith("2.6."):
242 raise PrereqError("Wrong major kernel version (detected %s, needs"
245 if not os.path.ismount("/sys"):
246 raise PrereqError("Can't find a filesystem mounted at /sys."
247 " Please mount /sys.")
249 if not os.path.isdir("/sys/block"):
250 raise SysconfigError("Can't find /sys/block directory. Has the"
251 " layout of /sys changed?")
253 if not os.path.ismount("/proc"):
254 raise PrereqError("Can't find a filesystem mounted at /proc."
255 " Please mount /proc.")
257 if not os.path.exists("/proc/mounts"):
258 raise SysconfigError("Can't find /proc/mounts")
261 def CheckVGExists(vgname):
262 """Checks to see if a volume group exists.
264 @param vgname: the volume group name
266 @return: a four-tuple (exists, lv_count, vg_size, vg_free), where:
267 - exists: True if the volume exists, otherwise False; if False,
268 all other members of the tuple are None
269 - lv_count: The number of logical volumes in the volume group
270 - vg_size: The total size of the volume group (in gibibytes)
271 - vg_free: The available space in the volume group
274 result = ExecCommand("vgs --nohead -o lv_count,vg_size,vg_free"
275 " --nosuffix --units g"
276 " --ignorelockingfailure %s" % vgname)
277 if not result.failed:
279 lv_count, vg_size, vg_free = result.stdout.strip().split()
281 # This means the output of vgdisplay can't be parsed
282 raise PrereqError("cannot parse output of vgs (%s)" % result.stdout)
284 lv_count = vg_size = vg_free = None
286 return not result.failed, lv_count, vg_size, vg_free
289 def CheckSysDev(name, devnum):
290 """Checks consistency between /sys and /dev trees.
292 In /sys/block/<name>/dev and /sys/block/<name>/<part>/dev are the
293 kernel-known device numbers. The /dev/<name> block/char devices are
294 created by userspace and thus could differ from the kernel
295 view. This function checks the consistency between the device number
296 read from /sys and the actual device number in /dev.
298 Note that since the system could be using udev which removes and
299 recreates the device nodes on partition table rescan, we need to do
300 some retries here. Since we only do a stat, we can afford to do many
303 @param name: the device name, e.g. 'sda'
304 @param devnum: the device number, e.g. 0x803 (2051 in decimal) for sda3
305 @raises L{SysconfigError}: in case of failure of the check
308 path = "/dev/%s" % name
310 if os.path.exists(path):
314 raise SysconfigError("the device file %s does not exist, but the block"
315 " device exists in the /sys/block tree" % path)
316 rdev = os.stat(path).st_rdev
318 raise SysconfigError("For device %s, the major:minor in /dev is %04x"
319 " while the major:minor in sysfs is %s" %
320 (path, rdev, devnum))
323 def ReadDev(syspath):
324 """Reads the device number from a sysfs path.
326 The device number is given in sysfs under a block device directory
327 in a file named 'dev' which contains major:minor (in ASCII). This
328 function reads that file and converts the major:minor pair to a dev
331 @type syspath: string
332 @param syspath: the path to a block device dir in sysfs,
333 e.g. C{/sys/block/sda}
335 @return: the device number
338 if not os.path.exists("%s/dev" % syspath):
339 raise ProgrammingError("Invalid path passed to ReadDev: %s" % syspath)
340 f = open("%s/dev" % syspath)
341 data = f.read().strip()
343 major, minor = data.split(":", 1)
346 dev = os.makedev(major, minor)
350 def ReadSize(syspath):
351 """Reads the size from a sysfs path.
353 The size is given in sysfs under a block device directory in a file
354 named 'size' which contains the number of sectors (in ASCII). This
355 function reads that file and converts the number in sectors to the
358 @type syspath: string
359 @param syspath: the path to a block device dir in sysfs,
360 e.g. C{/sys/block/sda}
363 @return: the device size in bytes
367 if not os.path.exists("%s/size" % syspath):
368 raise ProgrammingError("Invalid path passed to ReadSize: %s" % syspath)
369 f = open("%s/size" % syspath)
370 data = f.read().strip()
372 size = 512L * int(data)
377 """Reads physical volume information.
379 This function tries to see if a block device is a physical volume.
382 @param name: the device name (e.g. sda)
384 @return: the name of the volume group to which this PV belongs, or
385 "" if this PV is not in use, or None if this is not a PV
388 result = ExecCommand("pvdisplay -c /dev/%s" % name)
391 vgname = result.stdout.strip().split(":")[1]
395 def GetDiskList(opts):
396 """Computes the block device list for this system.
398 This function examines the /sys/block tree and using information
399 therein, computes the status of the block device.
401 @return: a list like [(name, size, dev, partitions, inuse), ...], where:
402 - name is the block device name (e.g. sda)
403 - size the size in bytes
404 - dev is the device number (e.g. 8704 for hdg)
405 - partitions is [(name, size, dev), ...] mirroring the disk list
406 data inuse is a boolean showing the in-use status of the disk,
407 computed as the possibility of re-reading the partition table
408 (the meaning of the operation varies with the kernel version,
409 but is usually accurate; a mounted disk/partition or swap-area
410 or PV with active LVs on it is busy)
414 for name in os.listdir("/sys/block"):
415 if not compat.any([name.startswith(pfx) for pfx in SUPPORTED_TYPES]):
418 size = ReadSize("/sys/block/%s" % name)
420 f = open("/sys/block/%s/removable" % name)
421 removable = int(f.read().strip())
424 if removable and not opts.removable_ok:
427 dev = ReadDev("/sys/block/%s" % name)
428 CheckSysDev(name, dev)
430 # Enumerate partitions of the block device
432 for partname in os.listdir("/sys/block/%s" % name):
433 if not partname.startswith(name):
435 partdev = ReadDev("/sys/block/%s/%s" % (name, partname))
436 partsize = ReadSize("/sys/block/%s/%s" % (name, partname))
437 CheckSysDev(partname, partdev)
438 partitions.append((partname, partsize, partdev))
440 dlist.append((name, size, dev, partitions, inuse))
446 """Reads /proc/mounts and computes the mountpoint-devnum mapping.
448 This function reads /proc/mounts, finds the mounted filesystems
449 (excepting a hard-coded blacklist of network and virtual
450 filesystems) and does a stat on these mountpoints. The st_dev number
451 of the results is memorised for later matching against the
455 @return: a {mountpoint: device number} dictionary
458 mountlines = ReadFile("/proc/mounts").splitlines()
460 for line in mountlines:
461 _, mountpoint, fstype, _ = line.split(None, 3)
463 if fstype in ["nfs", "nfs4", "autofs", "tmpfs", "proc", "sysfs"]:
466 dev = os.stat(mountpoint).st_dev
468 # this should be a fairly rare error, since we are blacklisting
469 # network filesystems; with this in mind, we'll ignore it,
470 # since the rereadpt check catches in-use filesystems,
471 # and this is used for disk information only
472 print >> sys.stderr, ("Can't stat mountpoint '%s': %s" %
474 print >> sys.stderr, "Ignoring."
476 mounts[dev] = mountpoint
480 def DevInfo(name, dev, mountinfo):
481 """Computes miscellaneous information about a block device.
484 @param name: the device name, e.g. sda
486 @return: a tuple (mpath, whatvg, fileinfo), where:
487 - mpath is the mount path where this device is mounted or None
488 - whatvg is the result of the ReadPV function
489 - fileinfo is the output of file -bs on the device
493 mpath = mountinfo[dev]
497 whatvg = ReadPV(name)
499 result = ExecCommand("file -bs /dev/%s" % name)
501 fileinfo = "<error: %s>" % result.stderr
502 fileinfo = result.stdout[:45]
503 return mpath, whatvg, fileinfo
506 def ShowDiskInfo(opts):
507 """Shows a nicely formatted block device list for this system.
509 This function shows the user a table with the information gathered
510 by the other functions defined, in order to help the user make a
511 choice about which disks should be allocated to our volume group.
514 mounts = GetMountInfo()
515 dlist = GetDiskList(opts)
517 print "------- Disk information -------"
526 fields = ["name", "size", "used", "mount", "lvm", "info"]
529 # Flatten the [(disk, [partition,...]), ...] list
530 for name, size, dev, parts, inuse in dlist:
535 flatlist.append((name, size, dev, str_inuse))
536 for partname, partsize, partdev in parts:
537 flatlist.append((partname, partsize, partdev, ""))
540 for name, size, dev, in_use in flatlist:
541 mp, vgname, fileinfo = DevInfo(name, dev, mounts)
549 lvminfo = "in %s" % vgname
555 strlist.append([name, "%.2f" % (float(size) / 1024 / 1024),
556 in_use, mp, lvminfo, fileinfo])
558 data = cli.GenerateTable(headers, fields, None,
559 strlist, numfields=["size"])
565 def CheckSysfsHolders(name):
566 """Check to see if a device is 'hold' at sysfs level.
568 This is usually the case for Physical Volumes under LVM.
571 @return: true if the device is available according to sysfs
575 contents = os.listdir("/sys/block/%s/holders/" % name)
577 if err.errno == errno.ENOENT:
581 return not bool(contents)
584 def CheckReread(name):
585 """Check to see if a block device is in use.
587 Uses blockdev to reread the partition table of a block device (or
588 fuser if the device is not partitionable), and thus compute the
589 in-use status. See the discussion in GetDiskList about the meaning
593 @return: the in-use status of the device
596 use_blockdev = IsPartitioned(name)
598 cmd = "blockdev --rereadpt /dev/%s" % name
600 cmd = "fuser -vam /dev/%s" % name
603 result = ExecCommand(cmd)
604 if not use_blockdev and result.failed:
606 elif not result.failed:
611 return not result.failed
617 """Returns if a disk is in use or not.
620 return not (CheckSysfsHolders(name) and CheckReread(name))
624 """Wipes a block device.
626 This function wipes a block device, by clearing and re-reading the
627 partition table. If not successful, it writes back the old partition
628 data, and leaves the cleanup to the user.
630 @param name: the device name (e.g. sda)
635 raise OperationalError("CRITICAL: disk %s you selected seems to be in"
636 " use. ABORTING!" % name)
638 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
639 olddata = os.read(fd, 512)
640 if len(olddata) != 512:
641 raise OperationalError("CRITICAL: Can't read partition table information"
642 " from /dev/%s (needed 512 bytes, got %d" %
643 (name, len(olddata)))
646 bytes_written = os.write(fd, newdata)
648 if bytes_written != 512:
649 raise OperationalError("CRITICAL: Can't write partition table information"
650 " to /dev/%s (tried to write 512 bytes, written"
651 " %d. I don't know how to cleanup. Sorry." %
652 (name, bytes_written))
655 # try to restore the data
656 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
657 os.write(fd, olddata)
659 raise OperationalError("CRITICAL: disk %s which I have just wiped cannot"
660 " reread partition table. Most likely, it is"
661 " in use. You have to clean after this yourself."
662 " I tried to restore the old partition table,"
663 " but I cannot guarantee nothing has broken." %
667 def PartitionDisk(name):
668 """Partitions a disk.
670 This function creates a single partition spanning the entire disk,
673 @param name: the device name, e.g. sda
676 result = ExecCommand(
677 'echo ,,8e, | sfdisk /dev/%s' % name)
679 raise OperationalError("CRITICAL: disk %s which I have just partitioned"
680 " cannot reread its partition table, or there"
681 " is some other sfdisk error. Likely, it is in"
682 " use. You have to clean this yourself. Error"
683 " message from sfdisk: %s" %
684 (name, result.output))
687 def CreatePVOnDisk(name):
688 """Creates a physical volume on a block device.
690 This function creates a physical volume on a block device, overriding
691 all warnings. So it can wipe existing PVs and PVs which are in a VG.
693 @param name: the device name, e.g. sda
696 device = DeviceName(name)
697 result = ExecCommand("pvcreate -yff %s" % device)
699 raise OperationalError("I cannot create a physical volume on"
700 " %s. Error message: %s."
701 " Please clean up yourself." %
702 (device, result.output))
705 def CreateVG(vgname, disks):
706 """Creates the volume group.
708 This function creates a volume group named `vgname` on the disks
709 given as parameters. The physical extent size is set to 64MB.
711 @param disks: a list of disk names, e.g. ['sda','sdb']
714 pnames = [DeviceName(d) for d in disks]
715 result = ExecCommand("vgcreate -s 64MB '%s' %s" % (vgname, " ".join(pnames)))
717 raise OperationalError("I cannot create the volume group %s from"
718 " disks %s. Error message: %s. Please clean up"
720 (vgname, " ".join(disks), result.output))
723 def ValidateDiskList(options):
724 """Validates or computes the disk list for create.
726 This function either computes the available disk list (if the user
727 gave --alldisks option), or validates the user-given disk list (by
728 using the --disks option) such that all given disks are present and
731 @param options: the options returned from OptParser.parse_options
733 @return: a list of disk names, e.g. ['sda', 'sdb']
736 sysdisks = GetDiskList(options)
738 raise PrereqError("no disks found (I looked for"
739 " non-removable block devices).")
742 for name, _, _, _, used in sysdisks:
744 sysd_used.append(name)
746 sysd_free.append(name)
749 raise PrereqError("no free disks found! (%d in-use disks)" %
754 disklist = options.disks.split(",")
755 for name in disklist:
756 if name in sysd_used:
757 raise ParameterError("disk %s is in use, cannot wipe!" % name)
758 if name not in sysd_free:
759 raise ParameterError("cannot find disk %s!" % name)
761 raise ParameterError("Please use either --alldisks or --disks!")
767 """Actual main routine.
772 options, args = ParseOptions()
773 vgname = options.vgname
774 command = args.pop(0)
775 if command == "diskinfo":
776 ShowDiskInfo(options)
778 if command != "create":
781 exists, lv_count, vg_size, vg_free = CheckVGExists(vgname)
783 raise PrereqError("It seems volume group '%s' already exists:\n"
784 " LV count: %s, size: %s, free: %s." %
785 (vgname, lv_count, vg_size, vg_free))
788 disklist = ValidateDiskList(options)
790 for disk in disklist:
792 if IsPartitioned(disk):
794 for disk in disklist:
796 CreateVG(vgname, disklist)
798 status, lv_count, size, _ = CheckVGExists(vgname)
800 print "Done! %s: size %s GiB, disks: %s" % (vgname, size,
803 raise OperationalError("Although everything seemed ok, the volume"
804 " group did not get created.")
808 """Application entry point.
810 This is just a wrapper over BootStrap, to handle our own exceptions.
815 except PrereqError, err:
816 print >> sys.stderr, "The prerequisites for running this tool are not met."
817 print >> sys.stderr, ("Please make sure you followed all the steps in"
818 " the build document.")
819 print >> sys.stderr, "Description: %s" % str(err)
821 except SysconfigError, err:
822 print >> sys.stderr, ("This system's configuration seems wrong, at"
823 " least is not what I expect.")
824 print >> sys.stderr, ("Please check that the installation didn't fail"
826 print >> sys.stderr, "Description: %s" % str(err)
828 except ParameterError, err:
829 print >> sys.stderr, ("Some parameters you gave to the program or the"
830 " invocation is wrong. ")
831 print >> sys.stderr, "Description: %s" % str(err)
833 except OperationalError, err:
834 print >> sys.stderr, ("A serious error has happened while modifying"
835 " the system's configuration.")
836 print >> sys.stderr, ("Please review the error message below and make"
837 " sure you clean up yourself.")
838 print >> sys.stderr, ("It is most likely that the system configuration"
839 " has been partially altered.")
840 print >> sys.stderr, str(err)
842 except ProgrammingError, err:
843 print >> sys.stderr, ("Internal application error. Please signal this"
844 " to xencluster-team.")
845 print >> sys.stderr, "Error description: %s" % str(err)
848 print >> sys.stderr, "Unhandled application error: %s" % err
850 except (IOError, OSError), err:
851 print >> sys.stderr, "I/O error detected, please report."
852 print >> sys.stderr, "Description: %s" % str(err)
856 if __name__ == "__main__":