4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Program which configures LVM on the Ganeti nodes.
24 This program wipes disks and creates a volume group on top of them. It
25 can also show disk information to help you decide which disks you want
28 The error handling is done by raising our own exceptions from most of
29 the functions; these exceptions then handled globally in the main()
30 function. The exceptions that each function can raise are not
31 documented individually, since almost every error path ends in a
34 Another two exceptions that are handled globally are IOError and
35 OSError. The idea behind this is, since we run as root, we should
36 usually not get these errors, but if we do it's most probably a system
37 error, so they should be handled and the user instructed to report
48 from ganeti.utils import RunCmd, ReadFile
49 from ganeti import constants
50 from ganeti import cli
52 USAGE = ("\tlvmstrap diskinfo\n"
53 "\tlvmstrap [--vgname=NAME] [--allow-removable]"
54 " { --alldisks | --disks DISKLIST }"
60 class Error(Exception):
61 """Generic exception"""
65 class ProgrammingError(Error):
66 """Exception denoting invalid assumptions in programming.
68 This should catch sysfs tree changes, or otherwise incorrect
69 assumptions about the contents of the /sys/block/... directories.
75 class SysconfigError(Error):
76 """Exception denoting invalid system configuration.
78 If the system configuration is somehow wrong (e.g. /dev files
79 missing, or having mismatched major/minor numbers relative to
80 /sys/block devices), this exception will be raised.
82 This should usually mean that the installation of the Xen node
89 class PrereqError(Error):
90 """Exception denoting invalid prerequisites.
92 If the node does not meet the requirements for cluster membership, this
93 exception will be raised. Things like wrong kernel version, or no
94 free disks, etc. belong here.
96 This should usually mean that the build steps for the Xen node were
97 not followed correctly.
103 class OperationalError(Error):
104 """Exception denoting actual errors.
106 Errors during the bootstrapping are signaled using this exception.
112 class ParameterError(Error):
113 """Exception denoting invalid input from user.
115 Wrong disks given as parameters will be signaled using this
123 """Shows program usage information and exits the program.
126 print >> sys.stderr, "Usage:"
127 print >> sys.stderr, USAGE
132 """Parses the command line options.
134 In case of command line errors, it will show the usage and exit the
138 @return: a tuple of (options, args), as returned by
139 OptionParser.parse_args
142 global verbose_flag # pylint: disable-msg=W0603
144 parser = optparse.OptionParser(usage="\n%s" % USAGE,
145 version="%%prog (ganeti) %s" %
146 constants.RELEASE_VERSION)
148 parser.add_option("--alldisks", dest="alldisks",
149 help="erase ALL disks", action="store_true",
151 parser.add_option("-d", "--disks", dest="disks",
152 help="Choose disks (e.g. hda,hdg)",
154 parser.add_option(cli.VERBOSE_OPT)
155 parser.add_option("-r", "--allow-removable",
156 action="store_true", dest="removable_ok", default=False,
157 help="allow and use removable devices too")
158 parser.add_option("-g", "--vg-name", type="string",
159 dest="vgname", default="xenvg", metavar="NAME",
160 help="the volume group to be created [default: xenvg]")
163 options, args = parser.parse_args()
167 verbose_flag = options.verbose
172 def ExecCommand(command):
173 """Executes a command.
175 This is just a wrapper around commands.getstatusoutput, with the
176 difference that if the command line argument -v has been given, it
177 will print the command line and the command output on stdout.
179 @param command: the command line to be executed
181 @return: a tuple of (status, output) where status is the exit status
182 and output the stdout and stderr of the command together
187 result = RunCmd(command)
194 """Check the prerequisites of this program.
196 It check that it runs on Linux 2.6, and that /sys is mounted and the
197 fact that /sys/block is a directory.
201 raise PrereqError("This tool runs as root only. Really.")
203 osname, _, release, _, _ = os.uname()
204 if osname != 'Linux':
205 raise PrereqError("This tool only runs on Linux"
206 " (detected OS: %s)." % osname)
208 if not release.startswith("2.6."):
209 raise PrereqError("Wrong major kernel version (detected %s, needs"
212 if not os.path.ismount("/sys"):
213 raise PrereqError("Can't find a filesystem mounted at /sys."
214 " Please mount /sys.")
216 if not os.path.isdir("/sys/block"):
217 raise SysconfigError("Can't find /sys/block directory. Has the"
218 " layout of /sys changed?")
220 if not os.path.ismount("/proc"):
221 raise PrereqError("Can't find a filesystem mounted at /proc."
222 " Please mount /proc.")
224 if not os.path.exists("/proc/mounts"):
225 raise SysconfigError("Can't find /proc/mounts")
228 def CheckVGExists(vgname):
229 """Checks to see if a volume group exists.
231 @param vgname: the volume group name
233 @return: a four-tuple (exists, lv_count, vg_size, vg_free), where:
234 - exists: True if the volume exists, otherwise False; if False,
235 all other members of the tuple are None
236 - lv_count: The number of logical volumes in the volume group
237 - vg_size: The total size of the volume group (in gibibytes)
238 - vg_free: The available space in the volume group
241 result = ExecCommand("vgs --nohead -o lv_count,vg_size,vg_free"
242 " --nosuffix --units g"
243 " --ignorelockingfailure %s" % vgname)
244 if not result.failed:
246 lv_count, vg_size, vg_free = result.stdout.strip().split()
248 # This means the output of vgdisplay can't be parsed
249 raise PrereqError("cannot parse output of vgs (%s)" % result.stdout)
251 lv_count = vg_size = vg_free = None
253 return not result.failed, lv_count, vg_size, vg_free
256 def CheckSysDev(name, devnum):
257 """Checks consistency between /sys and /dev trees.
259 In /sys/block/<name>/dev and /sys/block/<name>/<part>/dev are the
260 kernel-known device numbers. The /dev/<name> block/char devices are
261 created by userspace and thus could differ from the kernel
262 view. This function checks the consistency between the device number
263 read from /sys and the actual device number in /dev.
265 Note that since the system could be using udev which removes and
266 recreates the device nodes on partition table rescan, we need to do
267 some retries here. Since we only do a stat, we can afford to do many
270 @param name: the device name, e.g. 'sda'
271 @param devnum: the device number, e.g. 0x803 (2051 in decimal) for sda3
272 @raises L{SysconfigError}: in case of failure of the check
275 path = "/dev/%s" % name
277 if os.path.exists(path):
281 raise SysconfigError("the device file %s does not exist, but the block"
282 " device exists in the /sys/block tree" % path)
283 rdev = os.stat(path).st_rdev
285 raise SysconfigError("For device %s, the major:minor in /dev is %04x"
286 " while the major:minor in sysfs is %s" %
287 (path, rdev, devnum))
290 def ReadDev(syspath):
291 """Reads the device number from a sysfs path.
293 The device number is given in sysfs under a block device directory
294 in a file named 'dev' which contains major:minor (in ASCII). This
295 function reads that file and converts the major:minor pair to a dev
298 @type syspath: string
299 @param syspath: the path to a block device dir in sysfs,
300 e.g. C{/sys/block/sda}
302 @return: the device number
305 if not os.path.exists("%s/dev" % syspath):
306 raise ProgrammingError("Invalid path passed to ReadDev: %s" % syspath)
307 f = open("%s/dev" % syspath)
308 data = f.read().strip()
310 major, minor = data.split(":", 1)
313 dev = os.makedev(major, minor)
317 def ReadSize(syspath):
318 """Reads the size from a sysfs path.
320 The size is given in sysfs under a block device directory in a file
321 named 'size' which contains the number of sectors (in ASCII). This
322 function reads that file and converts the number in sectors to the
325 @type syspath: string
326 @param syspath: the path to a block device dir in sysfs,
327 e.g. C{/sys/block/sda}
330 @return: the device size in bytes
334 if not os.path.exists("%s/size" % syspath):
335 raise ProgrammingError("Invalid path passed to ReadSize: %s" % syspath)
336 f = open("%s/size" % syspath)
337 data = f.read().strip()
339 size = 512L * int(data)
344 """Reads physical volume information.
346 This function tries to see if a block device is a physical volume.
349 @param name: the device name (e.g. sda)
351 @return: the name of the volume group to which this PV belongs, or
352 "" if this PV is not in use, or None if this is not a PV
355 result = ExecCommand("pvdisplay -c /dev/%s" % name)
358 vgname = result.stdout.strip().split(":")[1]
362 def GetDiskList(opts):
363 """Computes the block device list for this system.
365 This function examines the /sys/block tree and using information
366 therein, computes the status of the block device.
368 @return: a list like [(name, size, dev, partitions, inuse), ...], where:
369 - name is the block device name (e.g. sda)
370 - size the size in bytes
371 - dev is the device number (e.g. 8704 for hdg)
372 - partitions is [(name, size, dev), ...] mirroring the disk list
373 data inuse is a boolean showing the in-use status of the disk,
374 computed as the possibility of re-reading the partition table
375 (the meaning of the operation varies with the kernel version,
376 but is usually accurate; a mounted disk/partition or swap-area
377 or PV with active LVs on it is busy)
381 for name in os.listdir("/sys/block"):
382 if (not name.startswith("hd") and
383 not name.startswith("sd") and
384 not name.startswith("ubd")):
387 size = ReadSize("/sys/block/%s" % name)
389 f = open("/sys/block/%s/removable" % name)
390 removable = int(f.read().strip())
393 if removable and not opts.removable_ok:
396 dev = ReadDev("/sys/block/%s" % name)
397 CheckSysDev(name, dev)
398 inuse = not CheckReread(name)
399 # Enumerate partitions of the block device
401 for partname in os.listdir("/sys/block/%s" % name):
402 if not partname.startswith(name):
404 partdev = ReadDev("/sys/block/%s/%s" % (name, partname))
405 partsize = ReadSize("/sys/block/%s/%s" % (name, partname))
406 CheckSysDev(partname, partdev)
407 partitions.append((partname, partsize, partdev))
409 dlist.append((name, size, dev, partitions, inuse))
415 """Reads /proc/mounts and computes the mountpoint-devnum mapping.
417 This function reads /proc/mounts, finds the mounted filesystems
418 (excepting a hard-coded blacklist of network and virtual
419 filesystems) and does a stat on these mountpoints. The st_dev number
420 of the results is memorised for later matching against the
424 @return: a {mountpoint: device number} dictionary
427 mountlines = ReadFile("/proc/mounts").splitlines()
429 for line in mountlines:
430 _, mountpoint, fstype, _ = line.split(None, 3)
432 if fstype in ["nfs", "nfs4", "autofs", "tmpfs", "proc", "sysfs"]:
435 dev = os.stat(mountpoint).st_dev
437 # this should be a fairly rare error, since we are blacklisting
438 # network filesystems; with this in mind, we'll ignore it,
439 # since the rereadpt check catches in-use filesystems,
440 # and this is used for disk information only
441 print >> sys.stderr, ("Can't stat mountpoint '%s': %s" %
443 print >> sys.stderr, "Ignoring."
445 mounts[dev] = mountpoint
449 def DevInfo(name, dev, mountinfo):
450 """Computes miscellaneous information about a block device.
453 @param name: the device name, e.g. sda
455 @return: a tuple (mpath, whatvg, fileinfo), where:
456 - mpath is the mount path where this device is mounted or None
457 - whatvg is the result of the ReadPV function
458 - fileinfo is the output of file -bs on the device
462 mpath = mountinfo[dev]
466 whatvg = ReadPV(name)
468 result = ExecCommand("file -bs /dev/%s" % name)
470 fileinfo = "<error: %s>" % result.stderr
471 fileinfo = result.stdout[:45]
472 return mpath, whatvg, fileinfo
475 def ShowDiskInfo(opts):
476 """Shows a nicely formatted block device list for this system.
478 This function shows the user a table with the information gathered
479 by the other functions defined, in order to help the user make a
480 choice about which disks should be allocated to our volume group.
483 mounts = GetMountInfo()
484 dlist = GetDiskList(opts)
486 print "------- Disk information -------"
495 fields = ["name", "size", "used", "mount", "lvm", "info"]
498 # Flatten the [(disk, [partition,...]), ...] list
499 for name, size, dev, parts, inuse in dlist:
504 flatlist.append((name, size, dev, str_inuse))
505 for partname, partsize, partdev in parts:
506 flatlist.append((partname, partsize, partdev, ""))
509 for name, size, dev, in_use in flatlist:
510 mp, vgname, fileinfo = DevInfo(name, dev, mounts)
518 lvminfo = "in %s" % vgname
524 strlist.append([name, "%.2f" % (float(size) / 1024 / 1024),
525 in_use, mp, lvminfo, fileinfo])
527 data = cli.GenerateTable(headers, fields, None,
528 strlist, numfields=["size"])
534 def CheckReread(name):
535 """Check to see if a block device is in use.
537 Uses blockdev to reread the partition table of a block device, and
538 thus compute the in-use status. See the discussion in GetDiskList
539 about the meaning of 'in use'.
542 @return: the in-use status of the device
546 result = ExecCommand("blockdev --rereadpt /dev/%s" % name)
547 if not result.failed:
551 return not result.failed
555 """Wipes a block device.
557 This function wipes a block device, by clearing and re-reading the
558 partition table. If not successful, it writes back the old partition
559 data, and leaves the cleanup to the user.
561 @param name: the device name (e.g. sda)
565 if not CheckReread(name):
566 raise OperationalError("CRITICAL: disk %s you selected seems to be in"
567 " use. ABORTING!" % name)
569 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
570 olddata = os.read(fd, 512)
571 if len(olddata) != 512:
572 raise OperationalError("CRITICAL: Can't read partition table information"
573 " from /dev/%s (needed 512 bytes, got %d" %
574 (name, len(olddata)))
577 bytes_written = os.write(fd, newdata)
579 if bytes_written != 512:
580 raise OperationalError("CRITICAL: Can't write partition table information"
581 " to /dev/%s (tried to write 512 bytes, written"
582 " %d. I don't know how to cleanup. Sorry." %
583 (name, bytes_written))
585 if not CheckReread(name):
586 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
587 os.write(fd, olddata)
589 raise OperationalError("CRITICAL: disk %s which I have just wiped cannot"
590 " reread partition table. Most likely, it is"
591 " in use. You have to clean after this yourself."
592 " I tried to restore the old partition table,"
593 " but I cannot guarantee nothing has broken." %
597 def PartitionDisk(name):
598 """Partitions a disk.
600 This function creates a single partition spanning the entire disk,
603 @param name: the device name, e.g. sda
606 result = ExecCommand(
607 'echo ,,8e, | sfdisk /dev/%s' % name)
609 raise OperationalError("CRITICAL: disk %s which I have just partitioned"
610 " cannot reread its partition table, or there"
611 " is some other sfdisk error. Likely, it is in"
612 " use. You have to clean this yourself. Error"
613 " message from sfdisk: %s" %
614 (name, result.output))
617 def CreatePVOnDisk(name):
618 """Creates a physical volume on a block device.
620 This function creates a physical volume on a block device, overriding
621 all warnings. So it can wipe existing PVs and PVs which are in a VG.
623 @param name: the device name, e.g. sda
626 result = ExecCommand("pvcreate -yff /dev/%s1 " % name)
628 raise OperationalError("I cannot create a physical volume on"
629 " partition /dev/%s1. Error message: %s."
630 " Please clean up yourself." %
631 (name, result.output))
634 def CreateVG(vgname, disks):
635 """Creates the volume group.
637 This function creates a volume group named `vgname` on the disks
638 given as parameters. The physical extent size is set to 64MB.
640 @param disks: a list of disk names, e.g. ['sda','sdb']
643 pnames = ["'/dev/%s1'" % disk for disk in disks]
644 result = ExecCommand("vgcreate -s 64MB '%s' %s" % (vgname, " ".join(pnames)))
646 raise OperationalError("I cannot create the volume group %s from"
647 " disks %s. Error message: %s. Please clean up"
649 (vgname, " ".join(disks), result.output))
652 def ValidateDiskList(options):
653 """Validates or computes the disk list for create.
655 This function either computes the available disk list (if the user
656 gave --alldisks option), or validates the user-given disk list (by
657 using the --disks option) such that all given disks are present and
660 @param options: the options returned from OptParser.parse_options
662 @return: a list of disk names, e.g. ['sda', 'sdb']
665 sysdisks = GetDiskList(options)
667 raise PrereqError("no disks found (I looked for"
668 " non-removable block devices).")
671 for name, _, _, _, used in sysdisks:
673 sysd_used.append(name)
675 sysd_free.append(name)
678 raise PrereqError("no free disks found! (%d in-use disks)" %
683 disklist = options.disks.split(",")
684 for name in disklist:
685 if name in sysd_used:
686 raise ParameterError("disk %s is in use, cannot wipe!" % name)
687 if name not in sysd_free:
688 raise ParameterError("cannot find disk %s!" % name)
690 raise ParameterError("Please use either --alldisks or --disks!")
696 """Actual main routine.
701 options, args = ParseOptions()
702 vgname = options.vgname
703 command = args.pop(0)
704 if command == "diskinfo":
705 ShowDiskInfo(options)
707 if command != "create":
710 exists, lv_count, vg_size, vg_free = CheckVGExists(vgname)
712 raise PrereqError("It seems volume group '%s' already exists:\n"
713 " LV count: %s, size: %s, free: %s." %
714 (vgname, lv_count, vg_size, vg_free))
717 disklist = ValidateDiskList(options)
719 for disk in disklist:
722 for disk in disklist:
724 CreateVG(vgname, disklist)
726 status, lv_count, size, _ = CheckVGExists(vgname)
728 print "Done! %s: size %s GiB, disks: %s" % (vgname, size,
731 raise OperationalError("Although everything seemed ok, the volume"
732 " group did not get created.")
736 """Application entry point.
738 This is just a wrapper over BootStrap, to handle our own exceptions.
743 except PrereqError, err:
744 print >> sys.stderr, "The prerequisites for running this tool are not met."
745 print >> sys.stderr, ("Please make sure you followed all the steps in"
746 " the build document.")
747 print >> sys.stderr, "Description: %s" % str(err)
749 except SysconfigError, err:
750 print >> sys.stderr, ("This system's configuration seems wrong, at"
751 " least is not what I expect.")
752 print >> sys.stderr, ("Please check that the installation didn't fail"
754 print >> sys.stderr, "Description: %s" % str(err)
756 except ParameterError, err:
757 print >> sys.stderr, ("Some parameters you gave to the program or the"
758 " invocation is wrong. ")
759 print >> sys.stderr, "Description: %s" % str(err)
761 except OperationalError, err:
762 print >> sys.stderr, ("A serious error has happened while modifying"
763 " the system's configuration.")
764 print >> sys.stderr, ("Please review the error message below and make"
765 " sure you clean up yourself.")
766 print >> sys.stderr, ("It is most likely that the system configuration"
767 " has been partially altered.")
768 print >> sys.stderr, str(err)
770 except ProgrammingError, err:
771 print >> sys.stderr, ("Internal application error. Please signal this"
772 " to xencluster-team.")
773 print >> sys.stderr, "Error description: %s" % str(err)
776 print >> sys.stderr, "Unhandled application error: %s" % err
778 except (IOError, OSError), err:
779 print >> sys.stderr, "I/O error detected, please report."
780 print >> sys.stderr, "Description: %s" % str(err)
784 if __name__ == "__main__":