4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Program which configures LVM on the Ganeti nodes.
24 This program wipes disks and creates a volume group on top of them. It
25 can also show disk information to help you decide which disks you want
28 The error handling is done by raising our own exceptions from most of
29 the functions; these exceptions then handled globally in the main()
30 function. The exceptions that each function can raise are not
31 documented individually, since almost every error path ends in a
34 Another two exceptions that are handled globally are IOError and
35 OSError. The idea behind this is, since we run as root, we should
36 usually not get these errors, but if we do it's most probably a system
37 error, so they should be handled and the user instructed to report
47 from ganeti.utils import RunCmd
48 from ganeti import constants
49 from ganeti import cli
51 USAGE = ("\tlvmstrap diskinfo\n"
52 "\tlvmstrap [--vgname=NAME] [--allow-removable]"
53 " { --alldisks | --disks DISKLIST }"
59 class Error(Exception):
60 """Generic exception"""
64 class ProgrammingError(Error):
65 """Exception denoting invalid assumptions in programming.
67 This should catch sysfs tree changes, or otherwise incorrect
68 assumptions about the contents of the /sys/block/... directories.
73 class SysconfigError(Error):
74 """Exception denoting invalid system configuration.
76 If the system configuration is somehow wrong (e.g. /dev files
77 missing, or having mismatched major/minor numbers relative to
78 /sys/block devices), this exception will be raised.
80 This should usually mean that the installation of the Xen node
86 class PrereqError(Error):
87 """Exception denoting invalid prerequisites.
89 If the node does not meet the requirements for cluster membership, this
90 exception will be raised. Things like wrong kernel version, or no
91 free disks, etc. belong here.
93 This should usually mean that the build steps for the Xen node were
94 not followed correctly.
99 class OperationalError(Error):
100 """Exception denoting actual errors.
102 Errors during the bootstrapping are signaled using this exception.
107 class ParameterError(Error):
108 """Exception denoting invalid input from user.
110 Wrong disks given as parameters will be signaled using this
117 """Shows program usage information and exits the program."""
119 print >> sys.stderr, "Usage:"
120 print >> sys.stderr, USAGE
125 """Parses the command line options.
127 In case of command line errors, it will show the usage and exit the
131 (options, args), as returned by OptionParser.parse_args
135 parser = optparse.OptionParser(usage="\n%s" % USAGE,
136 version="%%prog (ganeti) %s" %
137 constants.RELEASE_VERSION)
139 parser.add_option("--alldisks", dest="alldisks",
140 help="erase ALL disks", action="store_true",
142 parser.add_option("-d", "--disks", dest="disks",
143 help="Choose disks (e.g. hda,hdg)",
145 parser.add_option("-v", "--verbose",
146 action="store_true", dest="verbose", default=False,
147 help="print command execution messages to stdout")
148 parser.add_option("-r", "--allow-removable",
149 action="store_true", dest="removable_ok", default=False,
150 help="allow and use removable devices too")
151 parser.add_option("-g", "--vg-name", type="string",
152 dest="vgname", default="xenvg", metavar="NAME",
153 help="the volume group to be created [default: xenvg]")
156 options, args = parser.parse_args()
160 verbose_flag = options.verbose
165 def ExecCommand(command):
166 """Executes a command.
168 This is just a wrapper around commands.getstatusoutput, with the
169 difference that if the command line argument -v has been given, it
170 will print the command line and the command output on stdout.
175 (status, output) where status is the exit status and output the
176 stdout and stderr of the command together
181 result = RunCmd(command)
188 """Check the prerequisites of this program.
190 It check that it runs on Linux 2.6, and that /sys is mounted and the
191 fact that /sys/block is a directory.
195 raise PrereqError("This tool runs as root only. Really.")
197 osname, nodename, release, version, arch = os.uname()
198 if osname != 'Linux':
199 raise PrereqError("This tool only runs on Linux"
200 " (detected OS: %s)." % osname)
202 if not release.startswith("2.6."):
203 raise PrereqError("Wrong major kernel version (detected %s, needs"
206 if not os.path.ismount("/sys"):
207 raise PrereqError("Can't find a filesystem mounted at /sys."
208 " Please mount /sys.")
210 if not os.path.isdir("/sys/block"):
211 raise SysconfigError("Can't find /sys/block directory. Has the"
212 " layout of /sys changed?")
214 if not os.path.ismount("/proc"):
215 raise PrereqError("Can't find a filesystem mounted at /proc."
216 " Please mount /proc.")
218 if not os.path.exists("/proc/mounts"):
219 raise SysconfigError("Can't find /proc/mounts")
222 def CheckVGExists(vgname):
223 """Checks to see if a volume group exists.
226 vgname: the volume group name
229 a four-tuple (exists, lv_count, vg_size, vg_free), where:
230 exists: True if the volume exists, otherwise False; if False,
231 all other members of the tuple are None
232 lv_count: The number of logical volumes in the volume group
233 vg_size: The total size of the volume group (in gibibytes)
234 vg_free: The available space in the volume group
237 result = ExecCommand("vgs --nohead -o lv_count,vg_size,vg_free"
238 " --nosuffix --units g"
239 " --ignorelockingfailure %s" % vgname)
240 if not result.failed:
242 lv_count, vg_size, vg_free = result.stdout.strip().split()
244 # This means the output of vgdisplay can't be parsed
245 raise PrereqError("cannot parse output of vgs (%s)" % result.stdout)
247 lv_count = vg_size = vg_free = None
249 return not result.failed, lv_count, vg_size, vg_free
252 def CheckSysDev(name, devnum):
253 """Checks consistency between /sys and /dev trees.
255 In /sys/block/<name>/dev and /sys/block/<name>/<part>/dev are the
256 kernel-known device numbers. The /dev/<name> block/char devices are
257 created by userspace and thus could differ from the kernel
258 view. This function checks the consistency between the device number
259 read from /sys and the actual device number in /dev.
261 Note that since the system could be using udev which removes and
262 recreates the device nodes on partition table rescan, we need to do
263 some retries here. Since we only do a stat, we can afford to do many
267 name: the device name, e.g. 'sda'
268 devnum: the device number, e.g. 0x803 (2051 in decimal) for sda3
271 None; failure of the check is signaled by raising a
272 SysconfigError exception
275 path = "/dev/%s" % name
276 for retries in range(40):
277 if os.path.exists(path):
281 raise SysconfigError("the device file %s does not exist, but the block"
282 " device exists in the /sys/block tree" % path)
283 rdev = os.stat(path).st_rdev
285 raise SysconfigError("For device %s, the major:minor in /dev is %04x"
286 " while the major:minor in sysfs is %s" %
287 (path, rdev, devnum))
290 def ReadDev(syspath):
291 """Reads the device number from a sysfs path.
293 The device number is given in sysfs under a block device directory
294 in a file named 'dev' which contains major:minor (in ASCII). This
295 function reads that file and converts the major:minor pair to a dev
299 syspath: the path to a block device dir in sysfs, e.g. /sys/block/sda
305 if not os.path.exists("%s/dev" % syspath):
306 raise ProgrammingError("Invalid path passed to ReadDev: %s" % syspath)
307 f = open("%s/dev" % syspath)
308 data = f.read().strip()
310 major, minor = data.split(":", 1)
313 dev = os.makedev(major, minor)
317 def ReadSize(syspath):
318 """Reads the size from a sysfs path.
320 The size is given in sysfs under a block device directory in a file
321 named 'size' which contains the number of sectors (in ASCII). This
322 function reads that file and converts the number in sectors to the
326 syspath: the path to a block device dir in sysfs, e.g. /sys/block/sda
329 the device size in bytes
332 if not os.path.exists("%s/size" % syspath):
333 raise ProgrammingError("Invalid path passed to ReadSize: %s" % syspath)
334 f = open("%s/size" % syspath)
335 data = f.read().strip()
337 size = 512L * int(data)
342 """Reads physical volume information.
344 This function tries to see if a block device is a physical volume.
347 dev: the device name (e.g. sda)
349 The name of the volume group to which this PV belongs, or
350 "" if this PV is not in use, or
351 None if this is not a PV
354 result = ExecCommand("pvdisplay -c /dev/%s" % name)
357 vgname = result.stdout.strip().split(":")[1]
361 def GetDiskList(opts):
362 """Computes the block device list for this system.
364 This function examines the /sys/block tree and using information
365 therein, computes the status of the block device.
368 [(name, size, dev, partitions, inuse), ...]
370 name is the block device name (e.g. sda)
371 size the size in bytes
372 dev the device number (e.g. 8704 for hdg)
373 partitions is [(name, size, dev), ...] mirroring the disk list data
374 inuse is a boolean showing the in-use status of the disk, computed as the
375 possibility of re-reading the partition table (the meaning of the
376 operation varies with the kernel version, but is usually accurate;
377 a mounted disk/partition or swap-area or PV with active LVs on it
382 for name in os.listdir("/sys/block"):
383 if (not name.startswith("hd") and
384 not name.startswith("sd") and
385 not name.startswith("ubd")):
388 size = ReadSize("/sys/block/%s" % name)
390 f = open("/sys/block/%s/removable" % name)
391 removable = int(f.read().strip())
394 if removable and not opts.removable_ok:
397 dev = ReadDev("/sys/block/%s" % name)
398 CheckSysDev(name, dev)
399 inuse = not CheckReread(name)
400 # Enumerate partitions of the block device
402 for partname in os.listdir("/sys/block/%s" % name):
403 if not partname.startswith(name):
405 partdev = ReadDev("/sys/block/%s/%s" % (name, partname))
406 partsize = ReadSize("/sys/block/%s/%s" % (name, partname))
407 CheckSysDev(partname, partdev)
408 partitions.append((partname, partsize, partdev))
410 dlist.append((name, size, dev, partitions, inuse))
416 """Reads /proc/mounts and computes the mountpoint-devnum mapping.
418 This function reads /proc/mounts, finds the mounted filesystems
419 (excepting a hard-coded blacklist of network and virtual
420 filesystems) and does a stat on these mountpoints. The st_dev number
421 of the results is memorised for later matching against the
425 a mountpoint: device number dictionary
428 f = open("/proc/mounts", "r")
429 mountlines = f.readlines()
432 for line in mountlines:
433 device, mountpoint, fstype, rest = line.split(None, 3)
435 if fstype in ["nfs", "nfs4", "autofs", "tmpfs", "proc", "sysfs"]:
438 dev = os.stat(mountpoint).st_dev
440 # this should be a fairly rare error, since we are blacklisting
441 # network filesystems; with this in mind, we'll ignore it,
442 # since the rereadpt check catches in-use filesystems,
443 # and this is used for disk information only
444 print >> sys.stderr, ("Can't stat mountpoint '%s': %s" %
446 print >> sys.stderr, "Ignoring."
448 mounts[dev] = mountpoint
452 def DevInfo(name, dev, mountinfo):
453 """Computes miscellaneous information about a block device.
456 name: the device name, e.g. sda
459 (mpath, whatvg, fileinfo), where
460 mpath is the mount path where this device is mounted or None
461 whatvg is the result of the ReadPV function
462 fileinfo is the output of file -bs on the device
466 mpath = mountinfo[dev]
470 whatvg = ReadPV(name)
472 result = ExecCommand("file -bs /dev/%s" % name)
474 fileinfo = "<error: %s>" % result.stderr
475 fileinfo = result.stdout[:45]
476 return mpath, whatvg, fileinfo
479 def ShowDiskInfo(opts):
480 """Shows a nicely formatted block device list for this system.
482 This function shows the user a table with the information gathered
483 by the other functions defined, in order to help the user make a
484 choice about which disks should be allocated to our volume group.
487 mounts = GetMountInfo()
488 dlist = GetDiskList(opts)
490 print "------- Disk information -------"
499 fields = ["name", "size", "used", "mount", "lvm", "info"]
502 # Flatten the [(disk, [partition,...]), ...] list
503 for name, size, dev, parts, inuse in dlist:
508 flatlist.append((name, size, dev, str_inuse))
509 for partname, partsize, partdev in parts:
510 flatlist.append((partname, partsize, partdev, ""))
513 for name, size, dev, in_use in flatlist:
514 mp, vgname, fileinfo = DevInfo(name, dev, mounts)
522 lvminfo = "in %s" % vgname
528 strlist.append([name, "%.2f" % (float(size) / 1024 / 1024),
529 in_use, mp, lvminfo, fileinfo])
531 data = cli.GenerateTable(headers, fields, None,
532 strlist, numfields=["size"])
538 def CheckReread(name):
539 """Check to see if a block device is in use.
541 Uses blockdev to reread the partition table of a block device, and
542 thus compute the in-use status. See the discussion in GetDiskList
543 about the meaning of 'in use'.
546 boolean, the in-use status of the device
549 for retries in range(3):
550 result = ExecCommand("blockdev --rereadpt /dev/%s" % name)
551 if not result.failed:
555 return not result.failed
559 """Wipes a block device.
561 This function wipes a block device, by clearing and re-reading the
562 partition table. If not successful, it writes back the old partition
563 data, and leaves the cleanup to the user.
566 the device name (e.g. sda)
569 if not CheckReread(name):
570 raise OperationalError("CRITICAL: disk %s you selected seems to be in"
571 " use. ABORTING!" % name)
573 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
574 olddata = os.read(fd, 512)
575 if len(olddata) != 512:
576 raise OperationalError("CRITICAL: Can't read partition table information"
577 " from /dev/%s (needed 512 bytes, got %d" %
578 (name, len(olddata)))
581 bytes_written = os.write(fd, newdata)
583 if bytes_written != 512:
584 raise OperationalError("CRITICAL: Can't write partition table information"
585 " to /dev/%s (tried to write 512 bytes, written"
586 " %d. I don't know how to cleanup. Sorry." %
587 (name, bytes_written))
589 if not CheckReread(name):
590 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
591 os.write(fd, olddata)
593 raise OperationalError("CRITICAL: disk %s which I have just wiped cannot"
594 " reread partition table. Most likely, it is"
595 " in use. You have to clean after this yourself."
596 " I tried to restore the old partition table,"
597 " but I cannot guarantee nothing has broken." %
601 def PartitionDisk(name):
602 """Partitions a disk.
604 This function creates a single partition spanning the entire disk,
608 the device name, e.g. sda
610 result = ExecCommand(
611 'echo ,,8e, | sfdisk /dev/%s' % name)
613 raise OperationalError("CRITICAL: disk %s which I have just partitioned"
614 " cannot reread its partition table, or there"
615 " is some other sfdisk error. Likely, it is in"
616 " use. You have to clean this yourself. Error"
617 " message from sfdisk: %s" %
618 (name, result.output))
621 def CreatePVOnDisk(name):
622 """Creates a physical volume on a block device.
624 This function creates a physical volume on a block device, overriding
625 all warnings. So it can wipe existing PVs and PVs which are in a VG.
628 the device name, e.g. sda
631 result = ExecCommand("pvcreate -yff /dev/%s1 " % name)
633 raise OperationalError("I cannot create a physical volume on"
634 " partition /dev/%s1. Error message: %s."
635 " Please clean up yourself." %
636 (name, result.output))
639 def CreateVG(vgname, disks):
640 """Creates the volume group.
642 This function creates a volume group named `vgname` on the disks
643 given as parameters. The physical extent size is set to 64MB.
646 disks: a list of disk names, e.g. ['sda','sdb']
649 pnames = ["'/dev/%s1'" % disk for disk in disks]
650 result = ExecCommand("vgcreate -s 64MB '%s' %s" % (vgname, " ".join(pnames)))
652 raise OperationalError("I cannot create the volume group %s from"
653 " disks %s. Error message: %s. Please clean up"
655 (vgname, " ".join(disks), result.output))
658 def ValidateDiskList(options):
659 """Validates or computes the disk list for create.
661 This function either computes the available disk list (if the user
662 gave --alldisks option), or validates the user-given disk list (by
663 using the --disks option) such that all given disks are present and
667 the options returned from OptParser.parse_options
670 a list of disk names, e.g. ['sda', 'sdb']
673 sysdisks = GetDiskList(options)
675 raise PrereqError("no disks found (I looked for"
676 " non-removable block devices).")
679 for name, size, dev, part, used in sysdisks:
681 sysd_used.append(name)
683 sysd_free.append(name)
686 raise PrereqError("no free disks found! (%d in-use disks)" %
691 disklist = options.disks.split(",")
692 for name in disklist:
693 if name in sysd_used:
694 raise ParameterError("disk %s is in use, cannot wipe!" % name)
695 if name not in sysd_free:
696 raise ParameterError("cannot find disk %s!" % name)
698 raise ParameterError("Please use either --alldisks or --disks!")
704 """Actual main routine."""
708 options, args = ParseOptions()
709 vgname = options.vgname
710 command = args.pop(0)
711 if command == "diskinfo":
712 ShowDiskInfo(options)
714 if command != "create":
717 exists, lv_count, vg_size, vg_free = CheckVGExists(vgname)
719 raise PrereqError("It seems volume group '%s' already exists:\n"
720 " LV count: %s, size: %s, free: %s." %
721 (vgname, lv_count, vg_size, vg_free))
724 disklist = ValidateDiskList(options)
726 for disk in disklist:
729 for disk in disklist:
731 CreateVG(vgname, disklist)
733 status, lv_count, size, free = CheckVGExists(vgname)
735 print "Done! %s: size %s GiB, disks: %s" % (vgname, size,
738 raise OperationalError("Although everything seemed ok, the volume"
739 " group did not get created.")
743 """application entry point.
745 This is just a wrapper over BootStrap, to handle our own exceptions.
750 except PrereqError, err:
751 print >> sys.stderr, "The prerequisites for running this tool are not met."
752 print >> sys.stderr, ("Please make sure you followed all the steps in"
753 " the build document.")
754 print >> sys.stderr, "Description: %s" % str(err)
756 except SysconfigError, err:
757 print >> sys.stderr, ("This system's configuration seems wrong, at"
758 " least is not what I expect.")
759 print >> sys.stderr, ("Please check that the installation didn't fail"
761 print >> sys.stderr, "Description: %s" % str(err)
763 except ParameterError, err:
764 print >> sys.stderr, ("Some parameters you gave to the program or the"
765 " invocation is wrong. ")
766 print >> sys.stderr, "Description: %s" % str(err)
768 except OperationalError, err:
769 print >> sys.stderr, ("A serious error has happened while modifying"
770 " the system's configuration.")
771 print >> sys.stderr, ("Please review the error message below and make"
772 " sure you clean up yourself.")
773 print >> sys.stderr, ("It is most likely that the system configuration"
774 " has been partially altered.")
775 print >> sys.stderr, str(err)
777 except ProgrammingError, err:
778 print >> sys.stderr, ("Internal application error. Please signal this"
779 " to xencluster-team.")
780 print >> sys.stderr, "Error description: %s" % str(err)
783 print >> sys.stderr, "Unhandled application error: %s" % err
785 except (IOError, OSError), err:
786 print >> sys.stderr, "I/O error detected, please report."
787 print >> sys.stderr, "Description: %s" % str(err)
791 if __name__ == "__main__":