4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Program which configures LVM on the Ganeti nodes.
24 This program wipes disks and creates a volume group on top of them. It
25 can also show disk information to help you decide which disks you want
28 The error handling is done by raising our own exceptions from most of
29 the functions; these exceptions then handled globally in the main()
30 function. The exceptions that each function can raise are not
31 documented individually, since almost every error path ends in a
34 Another two exceptions that are handled globally are IOError and
35 OSError. The idea behind this is, since we run as root, we should
36 usually not get these errors, but if we do it's most probably a system
37 error, so they should be handled and the user instructed to report
47 from ganeti.utils import RunCmd, ReadFile
48 from ganeti import constants
49 from ganeti import cli
51 USAGE = ("\tlvmstrap diskinfo\n"
52 "\tlvmstrap [--vgname=NAME] [--allow-removable]"
53 " { --alldisks | --disks DISKLIST }"
59 class Error(Exception):
60 """Generic exception"""
64 class ProgrammingError(Error):
65 """Exception denoting invalid assumptions in programming.
67 This should catch sysfs tree changes, or otherwise incorrect
68 assumptions about the contents of the /sys/block/... directories.
73 class SysconfigError(Error):
74 """Exception denoting invalid system configuration.
76 If the system configuration is somehow wrong (e.g. /dev files
77 missing, or having mismatched major/minor numbers relative to
78 /sys/block devices), this exception will be raised.
80 This should usually mean that the installation of the Xen node
86 class PrereqError(Error):
87 """Exception denoting invalid prerequisites.
89 If the node does not meet the requirements for cluster membership, this
90 exception will be raised. Things like wrong kernel version, or no
91 free disks, etc. belong here.
93 This should usually mean that the build steps for the Xen node were
94 not followed correctly.
99 class OperationalError(Error):
100 """Exception denoting actual errors.
102 Errors during the bootstrapping are signaled using this exception.
107 class ParameterError(Error):
108 """Exception denoting invalid input from user.
110 Wrong disks given as parameters will be signaled using this
117 """Shows program usage information and exits the program."""
119 print >> sys.stderr, "Usage:"
120 print >> sys.stderr, USAGE
125 """Parses the command line options.
127 In case of command line errors, it will show the usage and exit the
131 (options, args), as returned by OptionParser.parse_args
133 global verbose_flag # pylint: disable-msg=W0603
135 parser = optparse.OptionParser(usage="\n%s" % USAGE,
136 version="%%prog (ganeti) %s" %
137 constants.RELEASE_VERSION)
139 parser.add_option("--alldisks", dest="alldisks",
140 help="erase ALL disks", action="store_true",
142 parser.add_option("-d", "--disks", dest="disks",
143 help="Choose disks (e.g. hda,hdg)",
145 parser.add_option(cli.VERBOSE_OPT)
146 parser.add_option("-r", "--allow-removable",
147 action="store_true", dest="removable_ok", default=False,
148 help="allow and use removable devices too")
149 parser.add_option("-g", "--vg-name", type="string",
150 dest="vgname", default="xenvg", metavar="NAME",
151 help="the volume group to be created [default: xenvg]")
154 options, args = parser.parse_args()
158 verbose_flag = options.verbose
163 def ExecCommand(command):
164 """Executes a command.
166 This is just a wrapper around commands.getstatusoutput, with the
167 difference that if the command line argument -v has been given, it
168 will print the command line and the command output on stdout.
173 (status, output) where status is the exit status and output the
174 stdout and stderr of the command together
179 result = RunCmd(command)
186 """Check the prerequisites of this program.
188 It check that it runs on Linux 2.6, and that /sys is mounted and the
189 fact that /sys/block is a directory.
193 raise PrereqError("This tool runs as root only. Really.")
195 osname, _, release, _, _ = os.uname()
196 if osname != 'Linux':
197 raise PrereqError("This tool only runs on Linux"
198 " (detected OS: %s)." % osname)
200 if not release.startswith("2.6."):
201 raise PrereqError("Wrong major kernel version (detected %s, needs"
204 if not os.path.ismount("/sys"):
205 raise PrereqError("Can't find a filesystem mounted at /sys."
206 " Please mount /sys.")
208 if not os.path.isdir("/sys/block"):
209 raise SysconfigError("Can't find /sys/block directory. Has the"
210 " layout of /sys changed?")
212 if not os.path.ismount("/proc"):
213 raise PrereqError("Can't find a filesystem mounted at /proc."
214 " Please mount /proc.")
216 if not os.path.exists("/proc/mounts"):
217 raise SysconfigError("Can't find /proc/mounts")
220 def CheckVGExists(vgname):
221 """Checks to see if a volume group exists.
224 vgname: the volume group name
227 a four-tuple (exists, lv_count, vg_size, vg_free), where:
228 exists: True if the volume exists, otherwise False; if False,
229 all other members of the tuple are None
230 lv_count: The number of logical volumes in the volume group
231 vg_size: The total size of the volume group (in gibibytes)
232 vg_free: The available space in the volume group
235 result = ExecCommand("vgs --nohead -o lv_count,vg_size,vg_free"
236 " --nosuffix --units g"
237 " --ignorelockingfailure %s" % vgname)
238 if not result.failed:
240 lv_count, vg_size, vg_free = result.stdout.strip().split()
242 # This means the output of vgdisplay can't be parsed
243 raise PrereqError("cannot parse output of vgs (%s)" % result.stdout)
245 lv_count = vg_size = vg_free = None
247 return not result.failed, lv_count, vg_size, vg_free
250 def CheckSysDev(name, devnum):
251 """Checks consistency between /sys and /dev trees.
253 In /sys/block/<name>/dev and /sys/block/<name>/<part>/dev are the
254 kernel-known device numbers. The /dev/<name> block/char devices are
255 created by userspace and thus could differ from the kernel
256 view. This function checks the consistency between the device number
257 read from /sys and the actual device number in /dev.
259 Note that since the system could be using udev which removes and
260 recreates the device nodes on partition table rescan, we need to do
261 some retries here. Since we only do a stat, we can afford to do many
265 name: the device name, e.g. 'sda'
266 devnum: the device number, e.g. 0x803 (2051 in decimal) for sda3
269 None; failure of the check is signaled by raising a
270 SysconfigError exception
273 path = "/dev/%s" % name
275 if os.path.exists(path):
279 raise SysconfigError("the device file %s does not exist, but the block"
280 " device exists in the /sys/block tree" % path)
281 rdev = os.stat(path).st_rdev
283 raise SysconfigError("For device %s, the major:minor in /dev is %04x"
284 " while the major:minor in sysfs is %s" %
285 (path, rdev, devnum))
288 def ReadDev(syspath):
289 """Reads the device number from a sysfs path.
291 The device number is given in sysfs under a block device directory
292 in a file named 'dev' which contains major:minor (in ASCII). This
293 function reads that file and converts the major:minor pair to a dev
297 syspath: the path to a block device dir in sysfs, e.g. /sys/block/sda
303 if not os.path.exists("%s/dev" % syspath):
304 raise ProgrammingError("Invalid path passed to ReadDev: %s" % syspath)
305 f = open("%s/dev" % syspath)
306 data = f.read().strip()
308 major, minor = data.split(":", 1)
311 dev = os.makedev(major, minor)
315 def ReadSize(syspath):
316 """Reads the size from a sysfs path.
318 The size is given in sysfs under a block device directory in a file
319 named 'size' which contains the number of sectors (in ASCII). This
320 function reads that file and converts the number in sectors to the
324 syspath: the path to a block device dir in sysfs, e.g. /sys/block/sda
327 the device size in bytes
330 if not os.path.exists("%s/size" % syspath):
331 raise ProgrammingError("Invalid path passed to ReadSize: %s" % syspath)
332 f = open("%s/size" % syspath)
333 data = f.read().strip()
335 size = 512L * int(data)
340 """Reads physical volume information.
342 This function tries to see if a block device is a physical volume.
345 dev: the device name (e.g. sda)
347 The name of the volume group to which this PV belongs, or
348 "" if this PV is not in use, or
349 None if this is not a PV
352 result = ExecCommand("pvdisplay -c /dev/%s" % name)
355 vgname = result.stdout.strip().split(":")[1]
359 def GetDiskList(opts):
360 """Computes the block device list for this system.
362 This function examines the /sys/block tree and using information
363 therein, computes the status of the block device.
366 [(name, size, dev, partitions, inuse), ...]
368 name is the block device name (e.g. sda)
369 size the size in bytes
370 dev the device number (e.g. 8704 for hdg)
371 partitions is [(name, size, dev), ...] mirroring the disk list data
372 inuse is a boolean showing the in-use status of the disk, computed as the
373 possibility of re-reading the partition table (the meaning of the
374 operation varies with the kernel version, but is usually accurate;
375 a mounted disk/partition or swap-area or PV with active LVs on it
380 for name in os.listdir("/sys/block"):
381 if (not name.startswith("hd") and
382 not name.startswith("sd") and
383 not name.startswith("ubd")):
386 size = ReadSize("/sys/block/%s" % name)
388 f = open("/sys/block/%s/removable" % name)
389 removable = int(f.read().strip())
392 if removable and not opts.removable_ok:
395 dev = ReadDev("/sys/block/%s" % name)
396 CheckSysDev(name, dev)
397 inuse = not CheckReread(name)
398 # Enumerate partitions of the block device
400 for partname in os.listdir("/sys/block/%s" % name):
401 if not partname.startswith(name):
403 partdev = ReadDev("/sys/block/%s/%s" % (name, partname))
404 partsize = ReadSize("/sys/block/%s/%s" % (name, partname))
405 CheckSysDev(partname, partdev)
406 partitions.append((partname, partsize, partdev))
408 dlist.append((name, size, dev, partitions, inuse))
414 """Reads /proc/mounts and computes the mountpoint-devnum mapping.
416 This function reads /proc/mounts, finds the mounted filesystems
417 (excepting a hard-coded blacklist of network and virtual
418 filesystems) and does a stat on these mountpoints. The st_dev number
419 of the results is memorised for later matching against the
423 a mountpoint: device number dictionary
426 mountlines = ReadFile("/proc/mounts").splitlines()
428 for line in mountlines:
429 _, mountpoint, fstype, _ = line.split(None, 3)
431 if fstype in ["nfs", "nfs4", "autofs", "tmpfs", "proc", "sysfs"]:
434 dev = os.stat(mountpoint).st_dev
436 # this should be a fairly rare error, since we are blacklisting
437 # network filesystems; with this in mind, we'll ignore it,
438 # since the rereadpt check catches in-use filesystems,
439 # and this is used for disk information only
440 print >> sys.stderr, ("Can't stat mountpoint '%s': %s" %
442 print >> sys.stderr, "Ignoring."
444 mounts[dev] = mountpoint
448 def DevInfo(name, dev, mountinfo):
449 """Computes miscellaneous information about a block device.
452 name: the device name, e.g. sda
455 (mpath, whatvg, fileinfo), where
456 mpath is the mount path where this device is mounted or None
457 whatvg is the result of the ReadPV function
458 fileinfo is the output of file -bs on the device
462 mpath = mountinfo[dev]
466 whatvg = ReadPV(name)
468 result = ExecCommand("file -bs /dev/%s" % name)
470 fileinfo = "<error: %s>" % result.stderr
471 fileinfo = result.stdout[:45]
472 return mpath, whatvg, fileinfo
475 def ShowDiskInfo(opts):
476 """Shows a nicely formatted block device list for this system.
478 This function shows the user a table with the information gathered
479 by the other functions defined, in order to help the user make a
480 choice about which disks should be allocated to our volume group.
483 mounts = GetMountInfo()
484 dlist = GetDiskList(opts)
486 print "------- Disk information -------"
495 fields = ["name", "size", "used", "mount", "lvm", "info"]
498 # Flatten the [(disk, [partition,...]), ...] list
499 for name, size, dev, parts, inuse in dlist:
504 flatlist.append((name, size, dev, str_inuse))
505 for partname, partsize, partdev in parts:
506 flatlist.append((partname, partsize, partdev, ""))
509 for name, size, dev, in_use in flatlist:
510 mp, vgname, fileinfo = DevInfo(name, dev, mounts)
518 lvminfo = "in %s" % vgname
524 strlist.append([name, "%.2f" % (float(size) / 1024 / 1024),
525 in_use, mp, lvminfo, fileinfo])
527 data = cli.GenerateTable(headers, fields, None,
528 strlist, numfields=["size"])
534 def CheckReread(name):
535 """Check to see if a block device is in use.
537 Uses blockdev to reread the partition table of a block device, and
538 thus compute the in-use status. See the discussion in GetDiskList
539 about the meaning of 'in use'.
542 boolean, the in-use status of the device
546 result = ExecCommand("blockdev --rereadpt /dev/%s" % name)
547 if not result.failed:
551 return not result.failed
555 """Wipes a block device.
557 This function wipes a block device, by clearing and re-reading the
558 partition table. If not successful, it writes back the old partition
559 data, and leaves the cleanup to the user.
562 the device name (e.g. sda)
565 if not CheckReread(name):
566 raise OperationalError("CRITICAL: disk %s you selected seems to be in"
567 " use. ABORTING!" % name)
569 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
570 olddata = os.read(fd, 512)
571 if len(olddata) != 512:
572 raise OperationalError("CRITICAL: Can't read partition table information"
573 " from /dev/%s (needed 512 bytes, got %d" %
574 (name, len(olddata)))
577 bytes_written = os.write(fd, newdata)
579 if bytes_written != 512:
580 raise OperationalError("CRITICAL: Can't write partition table information"
581 " to /dev/%s (tried to write 512 bytes, written"
582 " %d. I don't know how to cleanup. Sorry." %
583 (name, bytes_written))
585 if not CheckReread(name):
586 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
587 os.write(fd, olddata)
589 raise OperationalError("CRITICAL: disk %s which I have just wiped cannot"
590 " reread partition table. Most likely, it is"
591 " in use. You have to clean after this yourself."
592 " I tried to restore the old partition table,"
593 " but I cannot guarantee nothing has broken." %
597 def PartitionDisk(name):
598 """Partitions a disk.
600 This function creates a single partition spanning the entire disk,
604 the device name, e.g. sda
606 result = ExecCommand(
607 'echo ,,8e, | sfdisk /dev/%s' % name)
609 raise OperationalError("CRITICAL: disk %s which I have just partitioned"
610 " cannot reread its partition table, or there"
611 " is some other sfdisk error. Likely, it is in"
612 " use. You have to clean this yourself. Error"
613 " message from sfdisk: %s" %
614 (name, result.output))
617 def CreatePVOnDisk(name):
618 """Creates a physical volume on a block device.
620 This function creates a physical volume on a block device, overriding
621 all warnings. So it can wipe existing PVs and PVs which are in a VG.
624 the device name, e.g. sda
627 result = ExecCommand("pvcreate -yff /dev/%s1 " % name)
629 raise OperationalError("I cannot create a physical volume on"
630 " partition /dev/%s1. Error message: %s."
631 " Please clean up yourself." %
632 (name, result.output))
635 def CreateVG(vgname, disks):
636 """Creates the volume group.
638 This function creates a volume group named `vgname` on the disks
639 given as parameters. The physical extent size is set to 64MB.
642 disks: a list of disk names, e.g. ['sda','sdb']
645 pnames = ["'/dev/%s1'" % disk for disk in disks]
646 result = ExecCommand("vgcreate -s 64MB '%s' %s" % (vgname, " ".join(pnames)))
648 raise OperationalError("I cannot create the volume group %s from"
649 " disks %s. Error message: %s. Please clean up"
651 (vgname, " ".join(disks), result.output))
654 def ValidateDiskList(options):
655 """Validates or computes the disk list for create.
657 This function either computes the available disk list (if the user
658 gave --alldisks option), or validates the user-given disk list (by
659 using the --disks option) such that all given disks are present and
663 the options returned from OptParser.parse_options
666 a list of disk names, e.g. ['sda', 'sdb']
669 sysdisks = GetDiskList(options)
671 raise PrereqError("no disks found (I looked for"
672 " non-removable block devices).")
675 for name, _, _, _, used in sysdisks:
677 sysd_used.append(name)
679 sysd_free.append(name)
682 raise PrereqError("no free disks found! (%d in-use disks)" %
687 disklist = options.disks.split(",")
688 for name in disklist:
689 if name in sysd_used:
690 raise ParameterError("disk %s is in use, cannot wipe!" % name)
691 if name not in sysd_free:
692 raise ParameterError("cannot find disk %s!" % name)
694 raise ParameterError("Please use either --alldisks or --disks!")
700 """Actual main routine."""
704 options, args = ParseOptions()
705 vgname = options.vgname
706 command = args.pop(0)
707 if command == "diskinfo":
708 ShowDiskInfo(options)
710 if command != "create":
713 exists, lv_count, vg_size, vg_free = CheckVGExists(vgname)
715 raise PrereqError("It seems volume group '%s' already exists:\n"
716 " LV count: %s, size: %s, free: %s." %
717 (vgname, lv_count, vg_size, vg_free))
720 disklist = ValidateDiskList(options)
722 for disk in disklist:
725 for disk in disklist:
727 CreateVG(vgname, disklist)
729 status, lv_count, size, _ = CheckVGExists(vgname)
731 print "Done! %s: size %s GiB, disks: %s" % (vgname, size,
734 raise OperationalError("Although everything seemed ok, the volume"
735 " group did not get created.")
739 """application entry point.
741 This is just a wrapper over BootStrap, to handle our own exceptions.
746 except PrereqError, err:
747 print >> sys.stderr, "The prerequisites for running this tool are not met."
748 print >> sys.stderr, ("Please make sure you followed all the steps in"
749 " the build document.")
750 print >> sys.stderr, "Description: %s" % str(err)
752 except SysconfigError, err:
753 print >> sys.stderr, ("This system's configuration seems wrong, at"
754 " least is not what I expect.")
755 print >> sys.stderr, ("Please check that the installation didn't fail"
757 print >> sys.stderr, "Description: %s" % str(err)
759 except ParameterError, err:
760 print >> sys.stderr, ("Some parameters you gave to the program or the"
761 " invocation is wrong. ")
762 print >> sys.stderr, "Description: %s" % str(err)
764 except OperationalError, err:
765 print >> sys.stderr, ("A serious error has happened while modifying"
766 " the system's configuration.")
767 print >> sys.stderr, ("Please review the error message below and make"
768 " sure you clean up yourself.")
769 print >> sys.stderr, ("It is most likely that the system configuration"
770 " has been partially altered.")
771 print >> sys.stderr, str(err)
773 except ProgrammingError, err:
774 print >> sys.stderr, ("Internal application error. Please signal this"
775 " to xencluster-team.")
776 print >> sys.stderr, "Error description: %s" % str(err)
779 print >> sys.stderr, "Unhandled application error: %s" % err
781 except (IOError, OSError), err:
782 print >> sys.stderr, "I/O error detected, please report."
783 print >> sys.stderr, "Description: %s" % str(err)
787 if __name__ == "__main__":