4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Program which configures LVM on the Ganeti nodes.
24 This program wipes disks and creates a volume group on top of them. It
25 can also show disk information to help you decide which disks you want
28 The error handling is done by raising our own exceptions from most of
29 the functions; these exceptions then handled globally in the main()
30 function. The exceptions that each function can raise are not
31 documented individually, since almost every error path ends in a
34 Another two exceptions that are handled globally are IOError and
35 OSError. The idea behind this is, since we run as root, we should
36 usually not get these errors, but if we do it's most probably a system
37 error, so they should be handled and the user instructed to report
47 from ganeti.utils import RunCmd
48 from ganeti import constants
50 USAGE = ("\tlvmstrap.py diskinfo\n"
51 "\tlvmstrap.py [--vgname=NAME] { --alldisks | --disks DISKLIST }"
57 class Error(Exception):
58 """Generic exception"""
62 class ProgrammingError(Error):
63 """Exception denoting invalid assumptions in programming.
65 This should catch sysfs tree changes, or otherwise incorrect
66 assumptions about the contents of the /sys/block/... directories.
71 class SysconfigError(Error):
72 """Exception denoting invalid system configuration.
74 If the system configuration is somehow wrong (e.g. /dev files
75 missing, or having mismatched major/minor numbers relative to
76 /sys/block devices), this exception will be raised.
78 This should usually mean that the installation of the Xen node
84 class PrereqError(Error):
85 """Exception denoting invalid prerequisites.
87 If the node does not meet the requirements for cluster membership, this
88 exception will be raised. Things like wrong kernel version, or no
89 free disks, etc. belong here.
91 This should usually mean that the build steps for the Xen node were
92 not followed correctly.
97 class OperationalError(Error):
98 """Exception denoting actual errors.
100 Errors during the bootstrapping are signaled using this exception.
105 class ParameterError(Error):
106 """Exception denoting invalid input from user.
108 Wrong disks given as parameters will be signaled using this
115 """Shows program usage information and exits the program."""
117 print >> sys.stderr, "Usage:"
118 print >> sys.stderr, USAGE
123 """Parses the command line options.
125 In case of command line errors, it will show the usage and exit the
129 (options, args), as returned by OptionParser.parse_args
133 parser = optparse.OptionParser(usage="\n%s" % USAGE,
134 version="%%prog (ganeti) %s" %
135 constants.RELEASE_VERSION)
137 parser.add_option("--alldisks", dest="alldisks",
138 help="erase ALL disks", action="store_true",
140 parser.add_option("-d", "--disks", dest="disks",
141 help="Choose disks (e.g. hda,hdg)",
143 parser.add_option("-v", "--verbose",
144 action="store_true", dest="verbose", default=False,
145 help="print command execution messages to stdout")
146 parser.add_option("-g", "--vg-name", type="string",
147 dest="vgname", default="xenvg", metavar="NAME",
148 help="the volume group to be created [default: xenvg]")
151 options, args = parser.parse_args()
155 verbose_flag = options.verbose
160 def ExecCommand(command):
161 """Executes a command.
163 This is just a wrapper around commands.getstatusoutput, with the
164 difference that if the command line argument -v has been given, it
165 will print the command line and the command output on stdout.
170 (status, output) where status is the exit status and output the
171 stdout and stderr of the command together
176 result = RunCmd(command)
183 """Check the prerequisites of this program.
185 It check that it runs on Linux 2.6, and that /sys is mounted and the
186 fact that /sys/block is a directory.
190 raise PrereqError("This tool runs as root only. Really.")
192 osname, nodename, release, version, arch = os.uname()
193 if osname != 'Linux':
194 raise PrereqError("This tool only runs on Linux"
195 " (detected OS: %s)." % osname)
197 if not release.startswith("2.6."):
198 raise PrereqError("Wrong major kernel version (detected %s, needs"
201 if not os.path.ismount("/sys"):
202 raise PrereqError("Can't find a filesystem mounted at /sys."
203 " Please mount /sys.")
205 if not os.path.isdir("/sys/block"):
206 raise SysconfigError("Can't find /sys/block directory. Has the"
207 " layout of /sys changed?")
209 if not os.path.ismount("/proc"):
210 raise PrereqError("Can't find a filesystem mounted at /proc."
211 " Please mount /proc.")
213 if not os.path.exists("/proc/mounts"):
214 raise SysconfigError("Can't find /proc/mounts")
217 def CheckVGExists(vgname):
218 """Checks to see if a volume group exists.
221 vgname: the volume group name
224 a four-tuple (exists, lv_count, vg_size, vg_free), where:
225 exists: True if the volume exists, otherwise False; if False,
226 all other members of the tuple are None
227 lv_count: The number of logical volumes in the volume group
228 vg_size: The total size of the volume group (in gibibytes)
229 vg_free: The available space in the volume group
232 result = ExecCommand("vgs --nohead -o lv_count,vg_size,vg_free"
233 " --nosuffix --units g"
234 " --ignorelockingfailure %s" % vgname)
235 if not result.failed:
237 lv_count, vg_size, vg_free = result.stdout.strip().split()
239 # This means the output of vgdisplay can't be parsed
240 raise PrereqError("cannot parse output of vgs (%s)" % result.stdout)
242 lv_count = vg_size = vg_free = None
244 return not result.failed, lv_count, vg_size, vg_free
247 def CheckSysDev(name, devnum):
248 """Checks consistency between /sys and /dev trees.
250 In /sys/block/<name>/dev and /sys/block/<name>/<part>/dev are the
251 kernel-known device numbers. The /dev/<name> block/char devices are
252 created by userspace and thus could differ from the kernel
253 view. This function checks the consistency between the device number
254 read from /sys and the actual device number in /dev.
256 Note that since the system could be using udev which removes and
257 recreates the device nodes on partition table rescan, we need to do
258 some retries here. Since we only do a stat, we can afford to do many
262 name: the device name, e.g. 'sda'
263 devnum: the device number, e.g. 0x803 (2051 in decimal) for sda3
266 None; failure of the check is signalled by raising a
267 SysconfigError exception
270 path = "/dev/%s" % name
271 for retries in range(40):
272 if os.path.exists(path):
276 raise SysconfigError("the device file %s does not exist, but the block"
277 " device exists in the /sys/block tree" % path)
278 rdev = os.stat(path).st_rdev
280 raise SysconfigError("For device %s, the major:minor in /dev is %04x"
281 " while the major:minor in sysfs is %s" %
282 (path, rdev, devnum))
285 def ReadDev(syspath):
286 """Reads the device number from a sysfs path.
288 The device number is given in sysfs under a block device directory
289 in a file named 'dev' which contains major:minor (in ASCII). This
290 function reads that file and converts the major:minor pair to a dev
294 syspath: the path to a block device dir in sysfs, e.g. /sys/block/sda
300 if not os.path.exists("%s/dev" % syspath):
301 raise ProgrammingError("Invalid path passed to ReadDev: %s" % syspath)
302 f = open("%s/dev" % syspath)
303 data = f.read().strip()
305 major, minor = data.split(":", 1)
308 dev = os.makedev(major, minor)
312 def ReadSize(syspath):
313 """Reads the size from a sysfs path.
315 The size is given in sysfs under a block device directory in a file
316 named 'size' which contains the number of sectors (in ASCII). This
317 function reads that file and converts the number in sectors to the
321 syspath: the path to a block device dir in sysfs, e.g. /sys/block/sda
324 the device size in bytes
327 if not os.path.exists("%s/size" % syspath):
328 raise ProgrammingError("Invalid path passed to ReadSize: %s" % syspath)
329 f = open("%s/size" % syspath)
330 data = f.read().strip()
332 size = 512L * int(data)
337 """Reads physical volume information.
339 This function tries to see if a block device is a physical volume.
342 dev: the device name (e.g. sda)
344 The name of the volume group to which this PV belongs, or
345 "" if this PV is not in use, or
346 None if this is not a PV
349 result = ExecCommand("pvdisplay -c /dev/%s" % name)
352 vgname = result.stdout.strip().split(":")[1]
357 """Computes the block device list for this system.
359 This function examines the /sys/block tree and using information
360 therein, computes the status of the block device.
363 [(name, size, dev, partitions, inuse), ...]
365 name is the block device name (e.g. sda)
366 size the size in bytes
367 dev the device number (e.g. 8704 for hdg)
368 partitions is [(name, size, dev), ...] mirroring the disk list data
369 inuse is a boolean showing the in-use status of the disk, computed as the
370 possibility of re-reading the partition table (the meaning of the
371 operation varies with the kernel version, but is usually accurate;
372 a mounted disk/partition or swap-area or PV with active LVs on it
377 for name in os.listdir("/sys/block"):
378 if (not name.startswith("hd") and
379 not name.startswith("sd") and
380 not name.startswith("ubd")):
383 size = ReadSize("/sys/block/%s" % name)
385 f = open("/sys/block/%s/removable" % name)
386 removable = int(f.read().strip())
392 dev = ReadDev("/sys/block/%s" % name)
393 CheckSysDev(name, dev)
394 inuse = not CheckReread(name)
395 # Enumerate partitions of the block device
397 for partname in os.listdir("/sys/block/%s" % name):
398 if not partname.startswith(name):
400 partdev = ReadDev("/sys/block/%s/%s" % (name, partname))
401 partsize = ReadSize("/sys/block/%s/%s" % (name, partname))
402 CheckSysDev(partname, partdev)
403 partitions.append((partname, partsize, partdev))
405 dlist.append((name, size, dev, partitions, inuse))
411 """Reads /proc/mounts and computes the mountpoint-devnum mapping.
413 This function reads /proc/mounts, finds the mounted filesystems
414 (excepting a hard-coded blacklist of network and virtual
415 filesystems) and does a stat on these mountpoints. The st_dev number
416 of the results is memorised for later matching against the
420 a mountpoint: device number dictionary
423 f = open("/proc/mounts", "r")
424 mountlines = f.readlines()
427 for line in mountlines:
428 device, mountpoint, fstype, rest = line.split(None, 3)
430 if fstype in ["nfs", "nfs4", "autofs", "tmpfs", "proc", "sysfs"]:
433 dev = os.stat(mountpoint).st_dev
435 # this should be a fairly rare error, since we are blacklisting
436 # network filesystems; with this in mind, we'll ignore it,
437 # since the rereadpt check catches in-use filesystems,
438 # and this is used for disk information only
439 print >> sys.stderr, ("Can't stat mountpoint '%s': %s" %
441 print >> sys.stderr, "Ignoring."
443 mounts[dev] = mountpoint
447 def DevInfo(name, dev, mountinfo):
448 """Computes miscellaneous informations about a block device.
451 name: the device name, e.g. sda
454 (mpath, whatvg, fileinfo), where
455 mpath is the mount path where this device is mounted or None
456 whatvg is the result of the ReadPV function
457 fileinfo is the output of file -bs on the device
461 mpath = mountinfo[dev]
465 whatvg = ReadPV(name)
467 result = ExecCommand("file -bs /dev/%s" % name)
469 fileinfo = "<error: %s>" % result.stderr
470 fileinfo = result.stdout[:45]
471 return mpath, whatvg, fileinfo
475 """Shows a nicely formatted block device list for this system.
477 This function shows the user a table with the informations gathered
478 by the other functions defined, in order to help the user make a
479 choice about which disks should be allocated to our volume group.
482 mounts = GetMountInfo()
483 dlist = GetDiskList()
485 print "------- Disk information -------"
486 print ("%5s %7s %4s %5s %-10s %s" %
487 ("Name", "Size[M]", "Used", "Mount", "LVM?", "Info"))
490 # Flatten the [(disk, [partition,...]), ...] list
491 for name, size, dev, parts, inuse in dlist:
496 flatlist.append((name, size, dev, str_inuse))
497 for partname, partsize, partdev in parts:
498 flatlist.append((partname, partsize, partdev, ""))
500 for name, size, dev, in_use in flatlist:
501 mp, vgname, fileinfo = DevInfo(name, dev, mounts)
509 lvminfo = "in %s" % vgname
514 print ("%-5s %7.2f %-4s %-5s %-10s %s" %
515 (name, float(size) / 1024 / 1024, in_use, mp, lvminfo, fileinfo))
518 def CheckReread(name):
519 """Check to see if a block device is in use.
521 Uses blockdev to reread the partition table of a block device, and
522 thus compute the in-use status. See the discussion in GetDiskList
523 about the meaning of 'in use'.
526 boolean, the in-use status of the device
529 for retries in range(3):
530 result = ExecCommand("blockdev --rereadpt /dev/%s" % name)
531 if not result.failed:
535 return not result.failed
539 """Wipes a block device.
541 This function wipes a block device, by clearing and re-reading the
542 partition table. If not successful, it writes back the old partition
543 data, and leaves the cleanup to the user.
546 the device name (e.g. sda)
549 if not CheckReread(name):
550 raise OperationalError("CRITICAL: disk %s you selected seems to be in"
551 " use. ABORTING!" % name)
553 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
554 olddata = os.read(fd, 512)
555 if len(olddata) != 512:
556 raise OperationalError("CRITICAL: Can't read partition table information"
557 " from /dev/%s (needed 512 bytes, got %d" %
558 (name, len(olddata)))
561 bytes_written = os.write(fd, newdata)
563 if bytes_written != 512:
564 raise OperationalError("CRITICAL: Can't write partition table information"
565 " to /dev/%s (tried to write 512 bytes, written"
566 " %d. I don't know how to cleanup. Sorry." %
567 (name, bytes_written))
569 if not CheckReread(name):
570 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
571 os.write(fd, olddata)
573 raise OperationalError("CRITICAL: disk %s which I have just wiped cannot"
574 " reread partition table. Most likely, it is"
575 " in use. You have to clean after this yourself."
576 " I tried to restore the old partition table,"
577 " but I cannot guarantee nothing has broken." %
581 def PartitionDisk(name):
582 """Partitions a disk.
584 This function creates a single partition spanning the entire disk,
588 the device name, e.g. sda
590 result = ExecCommand(
591 'echo ,,8e, | sfdisk /dev/%s' % name)
593 raise OperationalError("CRITICAL: disk %s which I have just partitioned"
594 " cannot reread its partition table, or there"
595 " is some other sfdisk error. Likely, it is in"
596 " use. You have to clean this yourself. Error"
597 " message from sfdisk: %s" %
598 (name, result.output))
601 def CreatePVOnDisk(name):
602 """Creates a physical volume on a block device.
604 This function creates a physical volume on a block device, overriding
605 all warnings. So it can wipe existing PVs and PVs which are in a VG.
608 the device name, e.g. sda
611 result = ExecCommand("pvcreate -yff /dev/%s1 " % name)
613 raise OperationalError("I cannot create a physical volume on"
614 " partition /dev/%s1. Error message: %s."
615 " Please clean up yourself." %
616 (name, result.output))
619 def CreateVG(vgname, disks):
620 """Creates the volume group.
622 This function creates a volume group named `vgname` on the disks
623 given as parameters. The physical extent size is set to 64MB.
626 disks: a list of disk names, e.g. ['sda','sdb']
629 pnames = ["'/dev/%s1'" % disk for disk in disks]
630 result = ExecCommand("vgcreate -s 64MB '%s' %s" % (vgname, " ".join(pnames)))
632 raise OperationalError("I cannot create the volume group %s from"
633 " disks %s. Error message: %s. Please clean up"
635 (vgname, " ".join(disks), result.output))
638 def ValidateDiskList(options):
639 """Validates or computes the disk list for create.
641 This function either computes the available disk list (if the user
642 gave --alldisks option), or validates the user-given disk list (by
643 using the --disks option) such that all given disks are present and
647 the options returned from OptParser.parse_options
650 a list of disk names, e.g. ['sda', 'sdb']
653 sysdisks = GetDiskList()
655 raise PrereqError("no disks found (I looked for"
656 " non-removable block devices).")
659 for name, size, dev, part, used in sysdisks:
661 sysd_used.append(name)
663 sysd_free.append(name)
666 raise PrereqError("no free disks found! (%d in-use disks)" %
671 disklist = options.disks.split(",")
672 for name in disklist:
673 if name in sysd_used:
674 raise ParameterError("disk %s is in use, cannot wipe!" % name)
675 if name not in sysd_free:
676 raise ParameterError("cannot find disk %s!" % name)
678 raise ParameterError("Please use either --alldisks or --disks!")
684 """Actual main routine."""
688 options, args = ParseOptions()
689 vgname = options.vgname
690 command = args.pop(0)
691 if command == "diskinfo":
694 if command != "create":
697 exists, lv_count, vg_size, vg_free = CheckVGExists(vgname)
699 raise PrereqError("It seems volume group '%s' already exists:\n"
700 " LV count: %s, size: %s, free: %s." %
701 (vgname, lv_count, vg_size, vg_free))
704 disklist = ValidateDiskList(options)
706 for disk in disklist:
709 for disk in disklist:
711 CreateVG(vgname, disklist)
713 status, lv_count, size, free = CheckVGExists(vgname)
715 print "Done! %s: size %s GiB, disks: %s" % (vgname, size,
718 raise OperationalError("Although everything seemed ok, the volume"
719 " group did not get created.")
723 """application entry point.
725 This is just a wrapper over BootStrap, to handle our own exceptions.
730 except PrereqError, err:
731 print >> sys.stderr, "The prerequisites for running this tool are not met."
732 print >> sys.stderr, ("Please make sure you followed all the steps in"
733 " the build document.")
734 print >> sys.stderr, "Description: %s" % str(err)
736 except SysconfigError, err:
737 print >> sys.stderr, ("This system's configuration seems wrong, at"
738 " least is not what I expect.")
739 print >> sys.stderr, ("Please check that the installation didn't fail"
741 print >> sys.stderr, "Description: %s" % str(err)
743 except ParameterError, err:
744 print >> sys.stderr, ("Some parameters you gave to the program or the"
745 " invocation is wrong. ")
746 print >> sys.stderr, "Description: %s" % str(err)
748 except OperationalError, err:
749 print >> sys.stderr, ("A serious error has happened while modifying"
750 " the system's configuration.")
751 print >> sys.stderr, ("Please review the error message below and make"
752 " sure you clean up yourself.")
753 print >> sys.stderr, ("It is most likely that the system configuration"
754 " has been partially altered.")
755 print >> sys.stderr, str(err)
757 except ProgrammingError, err:
758 print >> sys.stderr, ("Internal application error. Please signal this"
759 " to xencluster-team.")
760 print >> sys.stderr, "Error description: %s" % str(err)
763 print >> sys.stderr, "Unhandled application error: %s" % err
765 except (IOError, OSError), err:
766 print >> sys.stderr, "I/O error detected, please report."
767 print >> sys.stderr, "Description: %s" % str(err)
771 if __name__ == "__main__":