4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Program which configures LVM on the Ganeti nodes.
24 This program wipes disks and creates a volume group on top of them. It
25 can also show disk information to help you decide which disks you want
28 The error handling is done by raising our own exceptions from most of
29 the functions; these exceptions then handled globally in the main()
30 function. The exceptions that each function can raise are not
31 documented individually, since almost every error path ends in a
34 Another two exceptions that are handled globally are IOError and
35 OSError. The idea behind this is, since we run as root, we should
36 usually not get these errors, but if we do it's most probably a system
37 error, so they should be handled and the user instructed to report
47 from ganeti.utils import RunCmd, ReadFile
48 from ganeti import constants
50 USAGE = ("\tlvmstrap diskinfo\n"
51 "\tlvmstrap [--vgname=NAME] [--allow-removable]"
52 " { --alldisks | --disks DISKLIST }"
58 class Error(Exception):
59 """Generic exception"""
63 class ProgrammingError(Error):
64 """Exception denoting invalid assumptions in programming.
66 This should catch sysfs tree changes, or otherwise incorrect
67 assumptions about the contents of the /sys/block/... directories.
72 class SysconfigError(Error):
73 """Exception denoting invalid system configuration.
75 If the system configuration is somehow wrong (e.g. /dev files
76 missing, or having mismatched major/minor numbers relative to
77 /sys/block devices), this exception will be raised.
79 This should usually mean that the installation of the Xen node
85 class PrereqError(Error):
86 """Exception denoting invalid prerequisites.
88 If the node does not meet the requirements for cluster membership, this
89 exception will be raised. Things like wrong kernel version, or no
90 free disks, etc. belong here.
92 This should usually mean that the build steps for the Xen node were
93 not followed correctly.
98 class OperationalError(Error):
99 """Exception denoting actual errors.
101 Errors during the bootstrapping are signaled using this exception.
106 class ParameterError(Error):
107 """Exception denoting invalid input from user.
109 Wrong disks given as parameters will be signaled using this
116 """Shows program usage information and exits the program."""
118 print >> sys.stderr, "Usage:"
119 print >> sys.stderr, USAGE
124 """Parses the command line options.
126 In case of command line errors, it will show the usage and exit the
130 (options, args), as returned by OptionParser.parse_args
134 parser = optparse.OptionParser(usage="\n%s" % USAGE,
135 version="%%prog (ganeti) %s" %
136 constants.RELEASE_VERSION)
138 parser.add_option("--alldisks", dest="alldisks",
139 help="erase ALL disks", action="store_true",
141 parser.add_option("-d", "--disks", dest="disks",
142 help="Choose disks (e.g. hda,hdg)",
144 parser.add_option("-v", "--verbose",
145 action="store_true", dest="verbose", default=False,
146 help="print command execution messages to stdout")
147 parser.add_option("-r", "--allow-removable",
148 action="store_true", dest="removable_ok", default=False,
149 help="allow and use removable devices too")
150 parser.add_option("-g", "--vg-name", type="string",
151 dest="vgname", default="xenvg", metavar="NAME",
152 help="the volume group to be created [default: xenvg]")
155 options, args = parser.parse_args()
159 verbose_flag = options.verbose
164 def ExecCommand(command):
165 """Executes a command.
167 This is just a wrapper around commands.getstatusoutput, with the
168 difference that if the command line argument -v has been given, it
169 will print the command line and the command output on stdout.
174 (status, output) where status is the exit status and output the
175 stdout and stderr of the command together
180 result = RunCmd(command)
187 """Check the prerequisites of this program.
189 It check that it runs on Linux 2.6, and that /sys is mounted and the
190 fact that /sys/block is a directory.
194 raise PrereqError("This tool runs as root only. Really.")
196 osname, nodename, release, version, arch = os.uname()
197 if osname != 'Linux':
198 raise PrereqError("This tool only runs on Linux"
199 " (detected OS: %s)." % osname)
201 if not release.startswith("2.6."):
202 raise PrereqError("Wrong major kernel version (detected %s, needs"
205 if not os.path.ismount("/sys"):
206 raise PrereqError("Can't find a filesystem mounted at /sys."
207 " Please mount /sys.")
209 if not os.path.isdir("/sys/block"):
210 raise SysconfigError("Can't find /sys/block directory. Has the"
211 " layout of /sys changed?")
213 if not os.path.ismount("/proc"):
214 raise PrereqError("Can't find a filesystem mounted at /proc."
215 " Please mount /proc.")
217 if not os.path.exists("/proc/mounts"):
218 raise SysconfigError("Can't find /proc/mounts")
221 def CheckVGExists(vgname):
222 """Checks to see if a volume group exists.
225 vgname: the volume group name
228 a four-tuple (exists, lv_count, vg_size, vg_free), where:
229 exists: True if the volume exists, otherwise False; if False,
230 all other members of the tuple are None
231 lv_count: The number of logical volumes in the volume group
232 vg_size: The total size of the volume group (in gibibytes)
233 vg_free: The available space in the volume group
236 result = ExecCommand("vgs --nohead -o lv_count,vg_size,vg_free"
237 " --nosuffix --units g"
238 " --ignorelockingfailure %s" % vgname)
239 if not result.failed:
241 lv_count, vg_size, vg_free = result.stdout.strip().split()
243 # This means the output of vgdisplay can't be parsed
244 raise PrereqError("cannot parse output of vgs (%s)" % result.stdout)
246 lv_count = vg_size = vg_free = None
248 return not result.failed, lv_count, vg_size, vg_free
251 def CheckSysDev(name, devnum):
252 """Checks consistency between /sys and /dev trees.
254 In /sys/block/<name>/dev and /sys/block/<name>/<part>/dev are the
255 kernel-known device numbers. The /dev/<name> block/char devices are
256 created by userspace and thus could differ from the kernel
257 view. This function checks the consistency between the device number
258 read from /sys and the actual device number in /dev.
260 Note that since the system could be using udev which removes and
261 recreates the device nodes on partition table rescan, we need to do
262 some retries here. Since we only do a stat, we can afford to do many
266 name: the device name, e.g. 'sda'
267 devnum: the device number, e.g. 0x803 (2051 in decimal) for sda3
270 None; failure of the check is signaled by raising a
271 SysconfigError exception
274 path = "/dev/%s" % name
275 for retries in range(40):
276 if os.path.exists(path):
280 raise SysconfigError("the device file %s does not exist, but the block"
281 " device exists in the /sys/block tree" % path)
282 rdev = os.stat(path).st_rdev
284 raise SysconfigError("For device %s, the major:minor in /dev is %04x"
285 " while the major:minor in sysfs is %s" %
286 (path, rdev, devnum))
289 def ReadDev(syspath):
290 """Reads the device number from a sysfs path.
292 The device number is given in sysfs under a block device directory
293 in a file named 'dev' which contains major:minor (in ASCII). This
294 function reads that file and converts the major:minor pair to a dev
298 syspath: the path to a block device dir in sysfs, e.g. /sys/block/sda
304 if not os.path.exists("%s/dev" % syspath):
305 raise ProgrammingError("Invalid path passed to ReadDev: %s" % syspath)
306 f = open("%s/dev" % syspath)
307 data = f.read().strip()
309 major, minor = data.split(":", 1)
312 dev = os.makedev(major, minor)
316 def ReadSize(syspath):
317 """Reads the size from a sysfs path.
319 The size is given in sysfs under a block device directory in a file
320 named 'size' which contains the number of sectors (in ASCII). This
321 function reads that file and converts the number in sectors to the
325 syspath: the path to a block device dir in sysfs, e.g. /sys/block/sda
328 the device size in bytes
331 if not os.path.exists("%s/size" % syspath):
332 raise ProgrammingError("Invalid path passed to ReadSize: %s" % syspath)
333 f = open("%s/size" % syspath)
334 data = f.read().strip()
336 size = 512L * int(data)
341 """Reads physical volume information.
343 This function tries to see if a block device is a physical volume.
346 dev: the device name (e.g. sda)
348 The name of the volume group to which this PV belongs, or
349 "" if this PV is not in use, or
350 None if this is not a PV
353 result = ExecCommand("pvdisplay -c /dev/%s" % name)
356 vgname = result.stdout.strip().split(":")[1]
360 def GetDiskList(opts):
361 """Computes the block device list for this system.
363 This function examines the /sys/block tree and using information
364 therein, computes the status of the block device.
367 [(name, size, dev, partitions, inuse), ...]
369 name is the block device name (e.g. sda)
370 size the size in bytes
371 dev the device number (e.g. 8704 for hdg)
372 partitions is [(name, size, dev), ...] mirroring the disk list data
373 inuse is a boolean showing the in-use status of the disk, computed as the
374 possibility of re-reading the partition table (the meaning of the
375 operation varies with the kernel version, but is usually accurate;
376 a mounted disk/partition or swap-area or PV with active LVs on it
381 for name in os.listdir("/sys/block"):
382 if (not name.startswith("hd") and
383 not name.startswith("sd") and
384 not name.startswith("ubd")):
387 size = ReadSize("/sys/block/%s" % name)
389 f = open("/sys/block/%s/removable" % name)
390 removable = int(f.read().strip())
393 if removable and not opts.removable_ok:
396 dev = ReadDev("/sys/block/%s" % name)
397 CheckSysDev(name, dev)
398 inuse = not CheckReread(name)
399 # Enumerate partitions of the block device
401 for partname in os.listdir("/sys/block/%s" % name):
402 if not partname.startswith(name):
404 partdev = ReadDev("/sys/block/%s/%s" % (name, partname))
405 partsize = ReadSize("/sys/block/%s/%s" % (name, partname))
406 CheckSysDev(partname, partdev)
407 partitions.append((partname, partsize, partdev))
409 dlist.append((name, size, dev, partitions, inuse))
415 """Reads /proc/mounts and computes the mountpoint-devnum mapping.
417 This function reads /proc/mounts, finds the mounted filesystems
418 (excepting a hard-coded blacklist of network and virtual
419 filesystems) and does a stat on these mountpoints. The st_dev number
420 of the results is memorised for later matching against the
424 a mountpoint: device number dictionary
427 mountlines = ReadFile("/proc/mounts").splitlines()
429 for line in mountlines:
430 device, mountpoint, fstype, rest = line.split(None, 3)
432 if fstype in ["nfs", "nfs4", "autofs", "tmpfs", "proc", "sysfs"]:
435 dev = os.stat(mountpoint).st_dev
437 # this should be a fairly rare error, since we are blacklisting
438 # network filesystems; with this in mind, we'll ignore it,
439 # since the rereadpt check catches in-use filesystems,
440 # and this is used for disk information only
441 print >> sys.stderr, ("Can't stat mountpoint '%s': %s" %
443 print >> sys.stderr, "Ignoring."
445 mounts[dev] = mountpoint
449 def DevInfo(name, dev, mountinfo):
450 """Computes miscellaneous information about a block device.
453 name: the device name, e.g. sda
456 (mpath, whatvg, fileinfo), where
457 mpath is the mount path where this device is mounted or None
458 whatvg is the result of the ReadPV function
459 fileinfo is the output of file -bs on the device
463 mpath = mountinfo[dev]
467 whatvg = ReadPV(name)
469 result = ExecCommand("file -bs /dev/%s" % name)
471 fileinfo = "<error: %s>" % result.stderr
472 fileinfo = result.stdout[:45]
473 return mpath, whatvg, fileinfo
476 def ShowDiskInfo(opts):
477 """Shows a nicely formatted block device list for this system.
479 This function shows the user a table with the information gathered
480 by the other functions defined, in order to help the user make a
481 choice about which disks should be allocated to our volume group.
484 mounts = GetMountInfo()
485 dlist = GetDiskList(opts)
487 print "------- Disk information -------"
488 print ("%5s %7s %4s %5s %-10s %s" %
489 ("Name", "Size[M]", "Used", "Mount", "LVM?", "Info"))
492 # Flatten the [(disk, [partition,...]), ...] list
493 for name, size, dev, parts, inuse in dlist:
498 flatlist.append((name, size, dev, str_inuse))
499 for partname, partsize, partdev in parts:
500 flatlist.append((partname, partsize, partdev, ""))
502 for name, size, dev, in_use in flatlist:
503 mp, vgname, fileinfo = DevInfo(name, dev, mounts)
511 lvminfo = "in %s" % vgname
516 print ("%-5s %7.2f %-4s %-5s %-10s %s" %
517 (name, float(size) / 1024 / 1024, in_use, mp, lvminfo, fileinfo))
520 def CheckReread(name):
521 """Check to see if a block device is in use.
523 Uses blockdev to reread the partition table of a block device, and
524 thus compute the in-use status. See the discussion in GetDiskList
525 about the meaning of 'in use'.
528 boolean, the in-use status of the device
531 for retries in range(3):
532 result = ExecCommand("blockdev --rereadpt /dev/%s" % name)
533 if not result.failed:
537 return not result.failed
541 """Wipes a block device.
543 This function wipes a block device, by clearing and re-reading the
544 partition table. If not successful, it writes back the old partition
545 data, and leaves the cleanup to the user.
548 the device name (e.g. sda)
551 if not CheckReread(name):
552 raise OperationalError("CRITICAL: disk %s you selected seems to be in"
553 " use. ABORTING!" % name)
555 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
556 olddata = os.read(fd, 512)
557 if len(olddata) != 512:
558 raise OperationalError("CRITICAL: Can't read partition table information"
559 " from /dev/%s (needed 512 bytes, got %d" %
560 (name, len(olddata)))
563 bytes_written = os.write(fd, newdata)
565 if bytes_written != 512:
566 raise OperationalError("CRITICAL: Can't write partition table information"
567 " to /dev/%s (tried to write 512 bytes, written"
568 " %d. I don't know how to cleanup. Sorry." %
569 (name, bytes_written))
571 if not CheckReread(name):
572 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
573 os.write(fd, olddata)
575 raise OperationalError("CRITICAL: disk %s which I have just wiped cannot"
576 " reread partition table. Most likely, it is"
577 " in use. You have to clean after this yourself."
578 " I tried to restore the old partition table,"
579 " but I cannot guarantee nothing has broken." %
583 def PartitionDisk(name):
584 """Partitions a disk.
586 This function creates a single partition spanning the entire disk,
590 the device name, e.g. sda
592 result = ExecCommand(
593 'echo ,,8e, | sfdisk /dev/%s' % name)
595 raise OperationalError("CRITICAL: disk %s which I have just partitioned"
596 " cannot reread its partition table, or there"
597 " is some other sfdisk error. Likely, it is in"
598 " use. You have to clean this yourself. Error"
599 " message from sfdisk: %s" %
600 (name, result.output))
603 def CreatePVOnDisk(name):
604 """Creates a physical volume on a block device.
606 This function creates a physical volume on a block device, overriding
607 all warnings. So it can wipe existing PVs and PVs which are in a VG.
610 the device name, e.g. sda
613 result = ExecCommand("pvcreate -yff /dev/%s1 " % name)
615 raise OperationalError("I cannot create a physical volume on"
616 " partition /dev/%s1. Error message: %s."
617 " Please clean up yourself." %
618 (name, result.output))
621 def CreateVG(vgname, disks):
622 """Creates the volume group.
624 This function creates a volume group named `vgname` on the disks
625 given as parameters. The physical extent size is set to 64MB.
628 disks: a list of disk names, e.g. ['sda','sdb']
631 pnames = ["'/dev/%s1'" % disk for disk in disks]
632 result = ExecCommand("vgcreate -s 64MB '%s' %s" % (vgname, " ".join(pnames)))
634 raise OperationalError("I cannot create the volume group %s from"
635 " disks %s. Error message: %s. Please clean up"
637 (vgname, " ".join(disks), result.output))
640 def ValidateDiskList(options):
641 """Validates or computes the disk list for create.
643 This function either computes the available disk list (if the user
644 gave --alldisks option), or validates the user-given disk list (by
645 using the --disks option) such that all given disks are present and
649 the options returned from OptParser.parse_options
652 a list of disk names, e.g. ['sda', 'sdb']
655 sysdisks = GetDiskList(options)
657 raise PrereqError("no disks found (I looked for"
658 " non-removable block devices).")
661 for name, size, dev, part, used in sysdisks:
663 sysd_used.append(name)
665 sysd_free.append(name)
668 raise PrereqError("no free disks found! (%d in-use disks)" %
673 disklist = options.disks.split(",")
674 for name in disklist:
675 if name in sysd_used:
676 raise ParameterError("disk %s is in use, cannot wipe!" % name)
677 if name not in sysd_free:
678 raise ParameterError("cannot find disk %s!" % name)
680 raise ParameterError("Please use either --alldisks or --disks!")
686 """Actual main routine."""
690 options, args = ParseOptions()
691 vgname = options.vgname
692 command = args.pop(0)
693 if command == "diskinfo":
694 ShowDiskInfo(options)
696 if command != "create":
699 exists, lv_count, vg_size, vg_free = CheckVGExists(vgname)
701 raise PrereqError("It seems volume group '%s' already exists:\n"
702 " LV count: %s, size: %s, free: %s." %
703 (vgname, lv_count, vg_size, vg_free))
706 disklist = ValidateDiskList(options)
708 for disk in disklist:
711 for disk in disklist:
713 CreateVG(vgname, disklist)
715 status, lv_count, size, free = CheckVGExists(vgname)
717 print "Done! %s: size %s GiB, disks: %s" % (vgname, size,
720 raise OperationalError("Although everything seemed ok, the volume"
721 " group did not get created.")
725 """application entry point.
727 This is just a wrapper over BootStrap, to handle our own exceptions.
732 except PrereqError, err:
733 print >> sys.stderr, "The prerequisites for running this tool are not met."
734 print >> sys.stderr, ("Please make sure you followed all the steps in"
735 " the build document.")
736 print >> sys.stderr, "Description: %s" % str(err)
738 except SysconfigError, err:
739 print >> sys.stderr, ("This system's configuration seems wrong, at"
740 " least is not what I expect.")
741 print >> sys.stderr, ("Please check that the installation didn't fail"
743 print >> sys.stderr, "Description: %s" % str(err)
745 except ParameterError, err:
746 print >> sys.stderr, ("Some parameters you gave to the program or the"
747 " invocation is wrong. ")
748 print >> sys.stderr, "Description: %s" % str(err)
750 except OperationalError, err:
751 print >> sys.stderr, ("A serious error has happened while modifying"
752 " the system's configuration.")
753 print >> sys.stderr, ("Please review the error message below and make"
754 " sure you clean up yourself.")
755 print >> sys.stderr, ("It is most likely that the system configuration"
756 " has been partially altered.")
757 print >> sys.stderr, str(err)
759 except ProgrammingError, err:
760 print >> sys.stderr, ("Internal application error. Please signal this"
761 " to xencluster-team.")
762 print >> sys.stderr, "Error description: %s" % str(err)
765 print >> sys.stderr, "Unhandled application error: %s" % err
767 except (IOError, OSError), err:
768 print >> sys.stderr, "I/O error detected, please report."
769 print >> sys.stderr, "Description: %s" % str(err)
773 if __name__ == "__main__":