4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Program which configures LVM on the Ganeti nodes.
24 This program wipes disks and creates a volume group on top of them. It
25 can also show disk information to help you decide which disks you want
28 The error handling is done by raising our own exceptions from most of
29 the functions; these exceptions then handled globally in the main()
30 function. The exceptions that each function can raise are not
31 documented individually, since almost every error path ends in a
34 Another two exceptions that are handled globally are IOError and
35 OSError. The idea behind this is, since we run as root, we should
36 usually not get these errors, but if we do it's most probably a system
37 error, so they should be handled and the user instructed to report
47 from ganeti.utils import RunCmd
48 from ganeti import constants
50 USAGE = ("\tlvmstrap diskinfo\n"
51 "\tlvmstrap [--vgname=NAME] [--allow-removable]"
52 " { --alldisks | --disks DISKLIST }"
58 class Error(Exception):
59 """Generic exception"""
63 class ProgrammingError(Error):
64 """Exception denoting invalid assumptions in programming.
66 This should catch sysfs tree changes, or otherwise incorrect
67 assumptions about the contents of the /sys/block/... directories.
72 class SysconfigError(Error):
73 """Exception denoting invalid system configuration.
75 If the system configuration is somehow wrong (e.g. /dev files
76 missing, or having mismatched major/minor numbers relative to
77 /sys/block devices), this exception will be raised.
79 This should usually mean that the installation of the Xen node
85 class PrereqError(Error):
86 """Exception denoting invalid prerequisites.
88 If the node does not meet the requirements for cluster membership, this
89 exception will be raised. Things like wrong kernel version, or no
90 free disks, etc. belong here.
92 This should usually mean that the build steps for the Xen node were
93 not followed correctly.
98 class OperationalError(Error):
99 """Exception denoting actual errors.
101 Errors during the bootstrapping are signaled using this exception.
106 class ParameterError(Error):
107 """Exception denoting invalid input from user.
109 Wrong disks given as parameters will be signaled using this
116 """Shows program usage information and exits the program."""
118 print >> sys.stderr, "Usage:"
119 print >> sys.stderr, USAGE
124 """Parses the command line options.
126 In case of command line errors, it will show the usage and exit the
130 (options, args), as returned by OptionParser.parse_args
134 parser = optparse.OptionParser(usage="\n%s" % USAGE,
135 version="%%prog (ganeti) %s" %
136 constants.RELEASE_VERSION)
138 parser.add_option("--alldisks", dest="alldisks",
139 help="erase ALL disks", action="store_true",
141 parser.add_option("-d", "--disks", dest="disks",
142 help="Choose disks (e.g. hda,hdg)",
144 parser.add_option("-v", "--verbose",
145 action="store_true", dest="verbose", default=False,
146 help="print command execution messages to stdout")
147 parser.add_option("-r", "--allow-removable",
148 action="store_true", dest="removable_ok", default=False,
149 help="allow and use removable devices too")
150 parser.add_option("-g", "--vg-name", type="string",
151 dest="vgname", default="xenvg", metavar="NAME",
152 help="the volume group to be created [default: xenvg]")
155 options, args = parser.parse_args()
159 verbose_flag = options.verbose
164 def ExecCommand(command):
165 """Executes a command.
167 This is just a wrapper around commands.getstatusoutput, with the
168 difference that if the command line argument -v has been given, it
169 will print the command line and the command output on stdout.
174 (status, output) where status is the exit status and output the
175 stdout and stderr of the command together
180 result = RunCmd(command)
187 """Check the prerequisites of this program.
189 It check that it runs on Linux 2.6, and that /sys is mounted and the
190 fact that /sys/block is a directory.
194 raise PrereqError("This tool runs as root only. Really.")
196 osname, nodename, release, version, arch = os.uname()
197 if osname != 'Linux':
198 raise PrereqError("This tool only runs on Linux"
199 " (detected OS: %s)." % osname)
201 if not release.startswith("2.6."):
202 raise PrereqError("Wrong major kernel version (detected %s, needs"
205 if not os.path.ismount("/sys"):
206 raise PrereqError("Can't find a filesystem mounted at /sys."
207 " Please mount /sys.")
209 if not os.path.isdir("/sys/block"):
210 raise SysconfigError("Can't find /sys/block directory. Has the"
211 " layout of /sys changed?")
213 if not os.path.ismount("/proc"):
214 raise PrereqError("Can't find a filesystem mounted at /proc."
215 " Please mount /proc.")
217 if not os.path.exists("/proc/mounts"):
218 raise SysconfigError("Can't find /proc/mounts")
221 def CheckVGExists(vgname):
222 """Checks to see if a volume group exists.
225 vgname: the volume group name
228 a four-tuple (exists, lv_count, vg_size, vg_free), where:
229 exists: True if the volume exists, otherwise False; if False,
230 all other members of the tuple are None
231 lv_count: The number of logical volumes in the volume group
232 vg_size: The total size of the volume group (in gibibytes)
233 vg_free: The available space in the volume group
236 result = ExecCommand("vgs --nohead -o lv_count,vg_size,vg_free"
237 " --nosuffix --units g"
238 " --ignorelockingfailure %s" % vgname)
239 if not result.failed:
241 lv_count, vg_size, vg_free = result.stdout.strip().split()
243 # This means the output of vgdisplay can't be parsed
244 raise PrereqError("cannot parse output of vgs (%s)" % result.stdout)
246 lv_count = vg_size = vg_free = None
248 return not result.failed, lv_count, vg_size, vg_free
251 def CheckSysDev(name, devnum):
252 """Checks consistency between /sys and /dev trees.
254 In /sys/block/<name>/dev and /sys/block/<name>/<part>/dev are the
255 kernel-known device numbers. The /dev/<name> block/char devices are
256 created by userspace and thus could differ from the kernel
257 view. This function checks the consistency between the device number
258 read from /sys and the actual device number in /dev.
260 Note that since the system could be using udev which removes and
261 recreates the device nodes on partition table rescan, we need to do
262 some retries here. Since we only do a stat, we can afford to do many
266 name: the device name, e.g. 'sda'
267 devnum: the device number, e.g. 0x803 (2051 in decimal) for sda3
270 None; failure of the check is signalled by raising a
271 SysconfigError exception
274 path = "/dev/%s" % name
275 for retries in range(40):
276 if os.path.exists(path):
280 raise SysconfigError("the device file %s does not exist, but the block"
281 " device exists in the /sys/block tree" % path)
282 rdev = os.stat(path).st_rdev
284 raise SysconfigError("For device %s, the major:minor in /dev is %04x"
285 " while the major:minor in sysfs is %s" %
286 (path, rdev, devnum))
289 def ReadDev(syspath):
290 """Reads the device number from a sysfs path.
292 The device number is given in sysfs under a block device directory
293 in a file named 'dev' which contains major:minor (in ASCII). This
294 function reads that file and converts the major:minor pair to a dev
298 syspath: the path to a block device dir in sysfs, e.g. /sys/block/sda
304 if not os.path.exists("%s/dev" % syspath):
305 raise ProgrammingError("Invalid path passed to ReadDev: %s" % syspath)
306 f = open("%s/dev" % syspath)
307 data = f.read().strip()
309 major, minor = data.split(":", 1)
312 dev = os.makedev(major, minor)
316 def ReadSize(syspath):
317 """Reads the size from a sysfs path.
319 The size is given in sysfs under a block device directory in a file
320 named 'size' which contains the number of sectors (in ASCII). This
321 function reads that file and converts the number in sectors to the
325 syspath: the path to a block device dir in sysfs, e.g. /sys/block/sda
328 the device size in bytes
331 if not os.path.exists("%s/size" % syspath):
332 raise ProgrammingError("Invalid path passed to ReadSize: %s" % syspath)
333 f = open("%s/size" % syspath)
334 data = f.read().strip()
336 size = 512L * int(data)
341 """Reads physical volume information.
343 This function tries to see if a block device is a physical volume.
346 dev: the device name (e.g. sda)
348 The name of the volume group to which this PV belongs, or
349 "" if this PV is not in use, or
350 None if this is not a PV
353 result = ExecCommand("pvdisplay -c /dev/%s" % name)
356 vgname = result.stdout.strip().split(":")[1]
360 def GetDiskList(opts):
361 """Computes the block device list for this system.
363 This function examines the /sys/block tree and using information
364 therein, computes the status of the block device.
367 [(name, size, dev, partitions, inuse), ...]
369 name is the block device name (e.g. sda)
370 size the size in bytes
371 dev the device number (e.g. 8704 for hdg)
372 partitions is [(name, size, dev), ...] mirroring the disk list data
373 inuse is a boolean showing the in-use status of the disk, computed as the
374 possibility of re-reading the partition table (the meaning of the
375 operation varies with the kernel version, but is usually accurate;
376 a mounted disk/partition or swap-area or PV with active LVs on it
381 for name in os.listdir("/sys/block"):
382 if (not name.startswith("hd") and
383 not name.startswith("sd") and
384 not name.startswith("ubd")):
387 size = ReadSize("/sys/block/%s" % name)
389 f = open("/sys/block/%s/removable" % name)
390 removable = int(f.read().strip())
393 if removable and not opts.removable_ok:
396 dev = ReadDev("/sys/block/%s" % name)
397 CheckSysDev(name, dev)
398 inuse = not CheckReread(name)
399 # Enumerate partitions of the block device
401 for partname in os.listdir("/sys/block/%s" % name):
402 if not partname.startswith(name):
404 partdev = ReadDev("/sys/block/%s/%s" % (name, partname))
405 partsize = ReadSize("/sys/block/%s/%s" % (name, partname))
406 CheckSysDev(partname, partdev)
407 partitions.append((partname, partsize, partdev))
409 dlist.append((name, size, dev, partitions, inuse))
415 """Reads /proc/mounts and computes the mountpoint-devnum mapping.
417 This function reads /proc/mounts, finds the mounted filesystems
418 (excepting a hard-coded blacklist of network and virtual
419 filesystems) and does a stat on these mountpoints. The st_dev number
420 of the results is memorised for later matching against the
424 a mountpoint: device number dictionary
427 f = open("/proc/mounts", "r")
428 mountlines = f.readlines()
431 for line in mountlines:
432 device, mountpoint, fstype, rest = line.split(None, 3)
434 if fstype in ["nfs", "nfs4", "autofs", "tmpfs", "proc", "sysfs"]:
437 dev = os.stat(mountpoint).st_dev
439 # this should be a fairly rare error, since we are blacklisting
440 # network filesystems; with this in mind, we'll ignore it,
441 # since the rereadpt check catches in-use filesystems,
442 # and this is used for disk information only
443 print >> sys.stderr, ("Can't stat mountpoint '%s': %s" %
445 print >> sys.stderr, "Ignoring."
447 mounts[dev] = mountpoint
451 def DevInfo(name, dev, mountinfo):
452 """Computes miscellaneous informations about a block device.
455 name: the device name, e.g. sda
458 (mpath, whatvg, fileinfo), where
459 mpath is the mount path where this device is mounted or None
460 whatvg is the result of the ReadPV function
461 fileinfo is the output of file -bs on the device
465 mpath = mountinfo[dev]
469 whatvg = ReadPV(name)
471 result = ExecCommand("file -bs /dev/%s" % name)
473 fileinfo = "<error: %s>" % result.stderr
474 fileinfo = result.stdout[:45]
475 return mpath, whatvg, fileinfo
478 def ShowDiskInfo(opts):
479 """Shows a nicely formatted block device list for this system.
481 This function shows the user a table with the informations gathered
482 by the other functions defined, in order to help the user make a
483 choice about which disks should be allocated to our volume group.
486 mounts = GetMountInfo()
487 dlist = GetDiskList(opts)
489 print "------- Disk information -------"
490 print ("%5s %7s %4s %5s %-10s %s" %
491 ("Name", "Size[M]", "Used", "Mount", "LVM?", "Info"))
494 # Flatten the [(disk, [partition,...]), ...] list
495 for name, size, dev, parts, inuse in dlist:
500 flatlist.append((name, size, dev, str_inuse))
501 for partname, partsize, partdev in parts:
502 flatlist.append((partname, partsize, partdev, ""))
504 for name, size, dev, in_use in flatlist:
505 mp, vgname, fileinfo = DevInfo(name, dev, mounts)
513 lvminfo = "in %s" % vgname
518 print ("%-5s %7.2f %-4s %-5s %-10s %s" %
519 (name, float(size) / 1024 / 1024, in_use, mp, lvminfo, fileinfo))
522 def CheckReread(name):
523 """Check to see if a block device is in use.
525 Uses blockdev to reread the partition table of a block device, and
526 thus compute the in-use status. See the discussion in GetDiskList
527 about the meaning of 'in use'.
530 boolean, the in-use status of the device
533 for retries in range(3):
534 result = ExecCommand("blockdev --rereadpt /dev/%s" % name)
535 if not result.failed:
539 return not result.failed
543 """Wipes a block device.
545 This function wipes a block device, by clearing and re-reading the
546 partition table. If not successful, it writes back the old partition
547 data, and leaves the cleanup to the user.
550 the device name (e.g. sda)
553 if not CheckReread(name):
554 raise OperationalError("CRITICAL: disk %s you selected seems to be in"
555 " use. ABORTING!" % name)
557 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
558 olddata = os.read(fd, 512)
559 if len(olddata) != 512:
560 raise OperationalError("CRITICAL: Can't read partition table information"
561 " from /dev/%s (needed 512 bytes, got %d" %
562 (name, len(olddata)))
565 bytes_written = os.write(fd, newdata)
567 if bytes_written != 512:
568 raise OperationalError("CRITICAL: Can't write partition table information"
569 " to /dev/%s (tried to write 512 bytes, written"
570 " %d. I don't know how to cleanup. Sorry." %
571 (name, bytes_written))
573 if not CheckReread(name):
574 fd = os.open("/dev/%s" % name, os.O_RDWR | os.O_SYNC)
575 os.write(fd, olddata)
577 raise OperationalError("CRITICAL: disk %s which I have just wiped cannot"
578 " reread partition table. Most likely, it is"
579 " in use. You have to clean after this yourself."
580 " I tried to restore the old partition table,"
581 " but I cannot guarantee nothing has broken." %
585 def PartitionDisk(name):
586 """Partitions a disk.
588 This function creates a single partition spanning the entire disk,
592 the device name, e.g. sda
594 result = ExecCommand(
595 'echo ,,8e, | sfdisk /dev/%s' % name)
597 raise OperationalError("CRITICAL: disk %s which I have just partitioned"
598 " cannot reread its partition table, or there"
599 " is some other sfdisk error. Likely, it is in"
600 " use. You have to clean this yourself. Error"
601 " message from sfdisk: %s" %
602 (name, result.output))
605 def CreatePVOnDisk(name):
606 """Creates a physical volume on a block device.
608 This function creates a physical volume on a block device, overriding
609 all warnings. So it can wipe existing PVs and PVs which are in a VG.
612 the device name, e.g. sda
615 result = ExecCommand("pvcreate -yff /dev/%s1 " % name)
617 raise OperationalError("I cannot create a physical volume on"
618 " partition /dev/%s1. Error message: %s."
619 " Please clean up yourself." %
620 (name, result.output))
623 def CreateVG(vgname, disks):
624 """Creates the volume group.
626 This function creates a volume group named `vgname` on the disks
627 given as parameters. The physical extent size is set to 64MB.
630 disks: a list of disk names, e.g. ['sda','sdb']
633 pnames = ["'/dev/%s1'" % disk for disk in disks]
634 result = ExecCommand("vgcreate -s 64MB '%s' %s" % (vgname, " ".join(pnames)))
636 raise OperationalError("I cannot create the volume group %s from"
637 " disks %s. Error message: %s. Please clean up"
639 (vgname, " ".join(disks), result.output))
642 def ValidateDiskList(options):
643 """Validates or computes the disk list for create.
645 This function either computes the available disk list (if the user
646 gave --alldisks option), or validates the user-given disk list (by
647 using the --disks option) such that all given disks are present and
651 the options returned from OptParser.parse_options
654 a list of disk names, e.g. ['sda', 'sdb']
657 sysdisks = GetDiskList(options)
659 raise PrereqError("no disks found (I looked for"
660 " non-removable block devices).")
663 for name, size, dev, part, used in sysdisks:
665 sysd_used.append(name)
667 sysd_free.append(name)
670 raise PrereqError("no free disks found! (%d in-use disks)" %
675 disklist = options.disks.split(",")
676 for name in disklist:
677 if name in sysd_used:
678 raise ParameterError("disk %s is in use, cannot wipe!" % name)
679 if name not in sysd_free:
680 raise ParameterError("cannot find disk %s!" % name)
682 raise ParameterError("Please use either --alldisks or --disks!")
688 """Actual main routine."""
692 options, args = ParseOptions()
693 vgname = options.vgname
694 command = args.pop(0)
695 if command == "diskinfo":
696 ShowDiskInfo(options)
698 if command != "create":
701 exists, lv_count, vg_size, vg_free = CheckVGExists(vgname)
703 raise PrereqError("It seems volume group '%s' already exists:\n"
704 " LV count: %s, size: %s, free: %s." %
705 (vgname, lv_count, vg_size, vg_free))
708 disklist = ValidateDiskList(options)
710 for disk in disklist:
713 for disk in disklist:
715 CreateVG(vgname, disklist)
717 status, lv_count, size, free = CheckVGExists(vgname)
719 print "Done! %s: size %s GiB, disks: %s" % (vgname, size,
722 raise OperationalError("Although everything seemed ok, the volume"
723 " group did not get created.")
727 """application entry point.
729 This is just a wrapper over BootStrap, to handle our own exceptions.
734 except PrereqError, err:
735 print >> sys.stderr, "The prerequisites for running this tool are not met."
736 print >> sys.stderr, ("Please make sure you followed all the steps in"
737 " the build document.")
738 print >> sys.stderr, "Description: %s" % str(err)
740 except SysconfigError, err:
741 print >> sys.stderr, ("This system's configuration seems wrong, at"
742 " least is not what I expect.")
743 print >> sys.stderr, ("Please check that the installation didn't fail"
745 print >> sys.stderr, "Description: %s" % str(err)
747 except ParameterError, err:
748 print >> sys.stderr, ("Some parameters you gave to the program or the"
749 " invocation is wrong. ")
750 print >> sys.stderr, "Description: %s" % str(err)
752 except OperationalError, err:
753 print >> sys.stderr, ("A serious error has happened while modifying"
754 " the system's configuration.")
755 print >> sys.stderr, ("Please review the error message below and make"
756 " sure you clean up yourself.")
757 print >> sys.stderr, ("It is most likely that the system configuration"
758 " has been partially altered.")
759 print >> sys.stderr, str(err)
761 except ProgrammingError, err:
762 print >> sys.stderr, ("Internal application error. Please signal this"
763 " to xencluster-team.")
764 print >> sys.stderr, "Error description: %s" % str(err)
767 print >> sys.stderr, "Unhandled application error: %s" % err
769 except (IOError, OSError), err:
770 print >> sys.stderr, "I/O error detected, please report."
771 print >> sys.stderr, "Description: %s" % str(err)
775 if __name__ == "__main__":