4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Program which configures LVM on the Ganeti nodes.
24 This program wipes disks and creates a volume group on top of them. It
25 can also show disk information to help you decide which disks you want
28 The error handling is done by raising our own exceptions from most of
29 the functions; these exceptions then handled globally in the main()
30 function. The exceptions that each function can raise are not
31 documented individually, since almost every error path ends in a
34 Another two exceptions that are handled globally are IOError and
35 OSError. The idea behind this is, since we run as root, we should
36 usually not get these errors, but if we do it's most probably a system
37 error, so they should be handled and the user instructed to report
47 from ganeti.utils import RunCmd
48 from ganeti import constants
50 USAGE = ("\tlvmstrap.py diskinfo\n"
51 "\tlvmstrap.py [--vgname=NAME] { --alldisks | --disks DISKLIST }"
57 class Error(Exception):
58 """Generic exception"""
62 class ProgrammingError(Error):
63 """Exception denoting invalid assumptions in programming.
65 This should catch sysfs tree changes, or otherwise incorrect
66 assumptions about the contents of the /sys/block/... directories.
71 class SysconfigError(Error):
72 """Exception denoting invalid system configuration.
74 If the system configuration is somehow wrong (e.g. /dev files
75 missing, or having mismatched major/minor numbers relative to
76 /sys/block devices), this exception will be raised.
78 This should usually mean that the installation of the Xen node
84 class PrereqError(Error):
85 """Exception denoting invalid prerequisites.
87 If the node does not meet the requirements for cluster membership, this
88 exception will be raised. Things like wrong kernel version, or no
89 free disks, etc. belong here.
91 This should usually mean that the build steps for the Xen node were
92 not followed correctly.
97 class OperationalError(Error):
98 """Exception denoting actual errors.
100 Errors during the bootstrapping are signaled using this exception.
105 class ParameterError(Error):
106 """Exception denoting invalid input from user.
108 Wrong disks given as parameters will be signaled using this
114 """Shows program usage information and exits the program."""
116 print >> sys.stderr, "Usage:"
117 print >> sys.stderr, USAGE
122 """Parses the command line options.
124 In case of command line errors, it will show the usage and exit the
128 (options, args), as returned by OptionParser.parse_args
132 parser = optparse.OptionParser(usage="\n%s" % USAGE,
133 version="%%prog (ganeti) %s" %
134 constants.RELEASE_VERSION)
136 parser.add_option("--alldisks", dest="alldisks",
137 help="erase ALL disks", action="store_true",
139 parser.add_option("-d", "--disks", dest="disks",
140 help="Choose disks (e.g. hda,hdg)",
142 parser.add_option("-v", "--verbose",
143 action="store_true", dest="verbose", default=False,
144 help="print command execution messages to stdout")
145 parser.add_option("-g", "--vg-name", type="string",
146 dest="vgname", default="xenvg", metavar="NAME",
147 help="the volume group to be created [default: xenvg]")
150 options, args = parser.parse_args()
154 verbose_flag = options.verbose
159 def ExecCommand(command):
160 """Executes a command.
162 This is just a wrapper around commands.getstatusoutput, with the
163 difference that if the command line argument -v has been given, it
164 will print the command line and the command output on stdout.
169 (status, output) where status is the exit status and output the
170 stdout and stderr of the command together
175 result = RunCmd(command)
182 """Check the prerequisites of this program.
184 It check that it runs on Linux 2.6, and that /sys is mounted and the
185 fact that /sys/block is a directory.
189 raise PrereqError("This tool runs as root only. Really.")
191 osname, nodename, release, version, arch = os.uname()
192 if osname != 'Linux':
193 raise PrereqError("This tool only runs on Linux "
194 "(detected OS: %s)." % osname)
196 if not release.startswith("2.6."):
197 raise PrereqError("Wrong major kernel version (detected %s, needs "
200 if not os.path.ismount("/sys"):
201 raise PrereqError("Can't find a filesystem mounted at /sys. "
202 "Please mount /sys.")
204 if not os.path.isdir("/sys/block"):
205 raise SysconfigError("Can't find /sys/block directory. Has the "
206 "layout of /sys changed?")
208 if not os.path.ismount("/proc"):
209 raise PrereqError("Can't find a filesystem mounted at /proc. "
210 "Please mount /proc.")
212 if not os.path.exists("/proc/mounts"):
213 raise SysconfigError("Can't find /proc/mounts")
216 def CheckVGExists(vgname):
217 """Checks to see if a volume group exists.
220 vgname: the volume group name
223 a four-tuple (exists, lv_count, vg_size, vg_free), where:
224 exists: True if the volume exists, otherwise False; if False,
225 all other members of the tuple are None
226 lv_count: The number of logical volumes in the volume group
227 vg_size: The total size of the volume group (in gibibytes)
228 vg_free: The available space in the volume group
231 result = ExecCommand("vgs --nohead -o lv_count,vg_size,"
232 "vg_free --nosuffix --units g "
233 "--ignorelockingfailure %s" % vgname)
234 if not result.failed:
236 lv_count, vg_size, vg_free = result.stdout.strip().split()
238 # This means the output of vgdisplay can't be parsed
239 raise PrereqError("cannot parse output of vgs (%s)" % result.stdout)
241 lv_count = vg_size = vg_free = None
243 return not result.failed, lv_count, vg_size, vg_free
246 def CheckSysDev(name, devnum):
247 """Checks consistency between /sys and /dev trees.
249 In /sys/block/<name>/dev and /sys/block/<name>/<part>/dev are the
250 kernel-known device numbers. The /dev/<name> block/char devices are
251 created by userspace and thus could differ from the kernel
252 view. This function checks the consistency between the device number
253 read from /sys and the actual device number in /dev.
255 Note that since the system could be using udev which removes and
256 recreates the device nodes on partition table rescan, we need to do
257 some retries here. Since we only do a stat, we can afford to do many
261 name: the device name, e.g. 'sda'
262 devnum: the device number, e.g. 0x803 (2051 in decimal) for sda3
265 None; failure of the check is signalled by raising a
266 SysconfigError exception
269 path = "/dev/%s" % name
270 for retries in range(40):
271 if os.path.exists(path):
275 raise SysconfigError("the device file %s does not exist, but the block "
276 "device exists in the /sys/block tree" % path)
277 rdev = os.stat(path).st_rdev
279 raise SysconfigError("For device %s, the major:minor in /dev is %04x "
280 "while the major:minor in sysfs is %s" %
281 (path, rdev, devnum))
284 def ReadDev(syspath):
285 """Reads the device number from a sysfs path.
287 The device number is given in sysfs under a block device directory
288 in a file named 'dev' which contains major:minor (in ASCII). This
289 function reads that file and converts the major:minor pair to a dev
293 syspath: the path to a block device dir in sysfs, e.g. /sys/block/sda
299 if not os.path.exists("%s/dev" % syspath):
300 raise ProgrammingError("Invalid path passed to ReadDev: %s" % syspath)
301 f = open("%s/dev" % syspath)
302 data = f.read().strip()
304 major, minor = data.split(":", 1)
307 dev = os.makedev(major, minor)
311 def ReadSize(syspath):
312 """Reads the size from a sysfs path.
314 The size is given in sysfs under a block device directory in a file
315 named 'size' which contains the number of sectors (in ASCII). This
316 function reads that file and converts the number in sectors to the
320 syspath: the path to a block device dir in sysfs, e.g. /sys/block/sda
323 the device size in bytes
326 if not os.path.exists("%s/size" % syspath):
327 raise ProgrammingError("Invalid path passed to ReadSize: %s" % syspath)
328 f = open("%s/size" % syspath)
329 data = f.read().strip()
331 size = 512L * int(data)
336 """Reads physical volume information.
338 This function tries to see if a block device is a physical volume.
341 dev: the device name (e.g. sda)
343 The name of the volume group to which this PV belongs, or
344 "" if this PV is not in use, or
345 None if this is not a PV
348 result = ExecCommand("pvdisplay -c /dev/%s" % name)
351 vgname = result.stdout.strip().split(":")[1]
356 """Computes the block device list for this system.
358 This function examines the /sys/block tree and using information
359 therein, computes the status of the block device.
362 [(name, size, dev, partitions, inuse), ...]
364 name is the block device name (e.g. sda)
365 size the size in bytes
366 dev the device number (e.g. 8704 for hdg)
367 partitions is [(name, size, dev), ...] mirroring the disk list data
368 inuse is a boolean showing the in-use status of the disk, computed as the
369 possibility of re-reading the partition table (the meaning of the
370 operation varies with the kernel version, but is usually accurate;
371 a mounted disk/partition or swap-area or PV with active LVs on it
376 for name in os.listdir("/sys/block"):
377 if (not name.startswith("hd") and
378 not name.startswith("sd") and
379 not name.startswith("ubd")):
382 size = ReadSize("/sys/block/%s" % name)
384 f = open("/sys/block/%s/removable" % name)
385 removable = int(f.read().strip())
391 dev = ReadDev("/sys/block/%s" % name)
392 CheckSysDev(name, dev)
393 inuse = not CheckReread(name)
394 # Enumerate partitions of the block device
396 for partname in os.listdir("/sys/block/%s" % name):
397 if not partname.startswith(name):
399 partdev = ReadDev("/sys/block/%s/%s" % (name, partname))
400 partsize = ReadSize("/sys/block/%s/%s" % (name, partname))
401 CheckSysDev(partname, partdev)
402 partitions.append((partname, partsize, partdev))
404 dlist.append((name, size, dev, partitions, inuse))
410 """Reads /proc/mounts and computes the mountpoint-devnum mapping.
412 This function reads /proc/mounts, finds the mounted filesystems
413 (excepting a hard-coded blacklist of network and virtual
414 filesystems) and does a stat on these mountpoints. The st_dev number
415 of the results is memorised for later matching against the
419 a mountpoint: device number dictionary
422 f = open("/proc/mounts", "r")
423 mountlines = f.readlines()
426 for line in mountlines:
427 device, mountpoint, fstype, rest = line.split(None, 3)
429 if fstype in ["nfs", "nfs4", "autofs", "tmpfs", "proc", "sysfs"]:
432 dev = os.stat(mountpoint).st_dev
434 # this should be a fairly rare error, since we are blacklisting
435 # network filesystems; with this in mind, we'll ignore it,
436 # since the rereadpt check catches in-use filesystems,
437 # and this is used for disk information only
438 print >> sys.stderr, ("Can't stat mountpoint '%s': %s" %
440 print >> sys.stderr, "Ignoring."
442 mounts[dev] = mountpoint
446 def DevInfo(name, dev, mountinfo):
447 """Computes miscellaneous informations about a block device.
450 name: the device name, e.g. sda
453 (mpath, whatvg, fileinfo), where
454 mpath is the mount path where this device is mounted or None
455 whatvg is the result of the ReadPV function
456 fileinfo is the output of file -bs on the device
460 mpath = mountinfo[dev]
464 whatvg = ReadPV(name)
466 result = ExecCommand("file -bs /dev/%s" % name)
468 fileinfo = "<error: %s>" % result.stderr
469 fileinfo = result.stdout[:45]
470 return mpath, whatvg, fileinfo
474 """Shows a nicely formatted block device list for this system.
476 This function shows the user a table with the informations gathered
477 by the other functions defined, in order to help the user make a
478 choice about which disks should be allocated to our volume group.
481 mounts = GetMountInfo()
482 dlist = GetDiskList()
484 print "------- Disk information -------"
485 print ("%5s %7s %4s %5s %-10s %s" %
486 ("Name", "Size[M]", "Used", "Mount", "LVM?", "Info"))
489 # Flatten the [(disk, [partition,...]), ...] list
490 for name, size, dev, parts, inuse in dlist:
495 flatlist.append((name, size, dev, str_inuse))
496 for partname, partsize, partdev in parts:
497 flatlist.append((partname, partsize, partdev, ""))
499 for name, size, dev, in_use in flatlist:
500 mp, vgname, fileinfo = DevInfo(name, dev, mounts)
508 lvminfo = "in %s" % vgname
513 print ("%-5s %7.2f %-4s %-5s %-10s %s" %
514 (name, float(size) / 1024 / 1024, in_use, mp, lvminfo, fileinfo))
517 def CheckReread(name):
518 """Check to see if a block device is in use.
520 Uses blockdev to reread the partition table of a block device, and
521 thus compute the in-use status. See the discussion in GetDiskList
522 about the meaning of 'in use'.
525 boolean, the in-use status of the device
528 for retries in range(3):
529 result = ExecCommand("blockdev --rereadpt /dev/%s" % name)
530 if not result.failed:
534 return not result.failed
538 """Wipes a block device.
540 This function wipes a block device, by clearing and re-reading the
541 partition table. If not successful, it writes back the old partition
542 data, and leaves the cleanup to the user.
545 the device name (e.g. sda)
548 if not CheckReread(name):
549 raise OperationalError("CRITICAL: disk %s you selected seems to be in "
550 "use. ABORTING!" % name)
552 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
553 olddata = os.read(fd, 512)
554 if len(olddata) != 512:
555 raise OperationalError("CRITICAL: Can't read partition table information "
556 "from /dev/%s (needed 512 bytes, got %d" %
557 (name, len(olddata)))
560 bytes_written = os.write(fd, newdata)
562 if bytes_written != 512:
563 raise OperationalError("CRITICAL: Can't write partition table information"
564 " to /dev/%s (tried to write 512 bytes, written "
565 "%d. I don't know how to cleanup. Sorry." %
566 (name, bytes_written))
568 if not CheckReread(name):
569 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
570 os.write(fd, olddata)
572 raise OperationalError("CRITICAL: disk %s which I have just wiped cannot "
573 "reread partition table. Most likely, it is "
574 "in use. You have to clean after this yourself. "
575 "I tried to restore the old partition table, "
576 "but I cannot guarantee nothing has broken." %
580 def PartitionDisk(name):
581 """Partitions a disk.
583 This function creates a single partition spanning the entire disk,
587 the device name, e.g. sda
589 result = ExecCommand(
590 'echo ,,8e, | sfdisk /dev/%s' % name)
592 raise OperationalError("CRITICAL: disk %s which I have just partitioned "
593 "cannot reread its partition table, or there "
594 "is some other sfdisk error. Likely, it is in "
595 "use. You have to clean this yourself. Error "
596 "message from sfdisk: %s" %
597 (name, result.output))
600 def CreatePVOnDisk(name):
601 """Creates a physical volume on a block device.
603 This function creates a physical volume on a block device, overriding
604 all warnings. So it can wipe existing PVs and PVs which are in a VG.
607 the device name, e.g. sda
610 result = ExecCommand("pvcreate -yff /dev/%s1 " % name)
612 raise OperationalError("I cannot create a physical volume on "
613 "partition /dev/%s1. Error message: %s. "
614 "Please clean up yourself." %
615 (name, result.output))
618 def CreateVG(vgname, disks):
619 """Creates the volume group.
621 This function creates a volume group named `vgname` on the disks
622 given as parameters. The physical extent size is set to 64MB.
625 disks: a list of disk names, e.g. ['sda','sdb']
628 pnames = ["'/dev/%s1'" % disk for disk in disks]
629 result = ExecCommand("vgcreate -s 64MB '%s' %s" % (vgname, " ".join(pnames)))
631 raise OperationalError("I cannot create the volume group %s from "
632 "disks %s. Error message: %s. Please clean up "
634 (vgname, " ".join(disks), result.output))
637 def ValidateDiskList(options):
638 """Validates or computes the disk list for create.
640 This function either computes the available disk list (if the user
641 gave --alldisks option), or validates the user-given disk list (by
642 using the --disks option) such that all given disks are present and
646 the options returned from OptParser.parse_options
649 a list of disk names, e.g. ['sda', 'sdb']
652 sysdisks = GetDiskList()
654 raise PrereqError("no disks found (I looked for "
655 "non-removable block devices).")
658 for name, size, dev, part, used in sysdisks:
660 sysd_used.append(name)
662 sysd_free.append(name)
665 raise PrereqError("no free disks found! (%d in-use disks)" %
670 disklist = options.disks.split(",")
671 for name in disklist:
672 if name in sysd_used:
673 raise ParameterError("disk %s is in use, cannot wipe!" % name)
674 if name not in sysd_free:
675 raise ParameterError("cannot find disk %s!" % name)
677 raise ParameterError("Please use either --alldisks or --disks!")
682 """Actual main routine."""
686 options, args = ParseOptions()
687 vgname = options.vgname
688 command = args.pop(0)
689 if command == "diskinfo":
692 if command != "create":
695 exists, lv_count, vg_size, vg_free = CheckVGExists(vgname)
697 raise PrereqError("It seems volume group '%s' already exists:\n"
698 " LV count: %s, size: %s, free: %s." %
699 (vgname, lv_count, vg_size, vg_free))
702 disklist = ValidateDiskList(options)
704 for disk in disklist:
707 for disk in disklist:
709 CreateVG(vgname, disklist)
711 status, lv_count, size, free = CheckVGExists(vgname)
713 print "Done! %s: size %s GiB, disks: %s" % (vgname, size,
716 raise OperationalError("Although everything seemed ok, the volume "
717 "group did not get created.")
721 """application entry point.
723 This is just a wrapper over BootStrap, to handle our own exceptions.
728 except PrereqError, err:
729 print >> sys.stderr, "The prerequisites for running this tool are not met."
730 print >> sys.stderr, ("Please make sure you followed all the steps in "
731 "the build document.")
732 print >> sys.stderr, "Description: %s" % str(err)
734 except SysconfigError, err:
735 print >> sys.stderr, ("This system's configuration seems wrong, at "
736 "least is not what I expect.")
737 print >> sys.stderr, ("Please check that the installation didn't fail "
739 print >> sys.stderr, "Description: %s" % str(err)
741 except ParameterError, err:
742 print >> sys.stderr, ("Some parameters you gave to the program or the "
743 "invocation is wrong. ")
744 print >> sys.stderr, "Description: %s" % str(err)
746 except OperationalError, err:
747 print >> sys.stderr, ("A serious error has happened while modifying "
748 "the system's configuration.")
749 print >> sys.stderr, ("Please review the error message below and make "
750 "sure you clean up yourself.")
751 print >> sys.stderr, ("It is most likely that the system configuration "
752 "has been partially altered.")
753 print >> sys.stderr, str(err)
755 except ProgrammingError, err:
756 print >> sys.stderr, ("Internal application error. Please signal this "
757 "to xencluster-team.")
758 print >> sys.stderr, "Error description: %s" % str(err)
761 print >> sys.stderr, "Unhandled application error: %s" % err
763 except (IOError, OSError), err:
764 print >> sys.stderr, "I/O error detected, please report."
765 print >> sys.stderr, "Description: %s" % str(err)
769 if __name__ == "__main__":