4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Functions used by the node daemon"""
35 from ganeti import logger
36 from ganeti import errors
37 from ganeti import utils
38 from ganeti import ssh
39 from ganeti import hypervisor
40 from ganeti import constants
41 from ganeti import bdev
42 from ganeti import objects
43 from ganeti import ssconf
47 return ssh.SshRunner()
51 """Activate local node as master node.
53 There are two needed steps for this:
54 - run the master script
55 - register the cron script
58 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
61 logger.Error("could not activate cluster interface with command %s,"
62 " error: '%s'" % (result.cmd, result.output))
69 """Deactivate this node as master.
71 This runs the master stop script.
74 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
77 logger.Error("could not deactivate cluster interface with command %s,"
78 " error: '%s'" % (result.cmd, result.output))
84 def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
85 """Joins this node to the cluster.
87 This does the following:
88 - updates the hostkeys of the machine (rsa and dsa)
89 - adds the ssh private key to the user
90 - adds the ssh public key to the users' authorized_keys file
93 sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
94 (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
95 (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
96 (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
97 for name, content, mode in sshd_keys:
98 utils.WriteFile(name, data=content, mode=mode)
101 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
103 except errors.OpExecError, err:
104 logger.Error("Error while processing user ssh files: %s" % err)
107 for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
108 utils.WriteFile(name, data=content, mode=0600)
110 utils.AddAuthorizedKey(auth_keys, sshpub)
112 utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
118 """Cleans up the current node and prepares it to be removed from the cluster.
121 if os.path.isdir(constants.DATA_DIR):
122 for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
123 full_name = os.path.join(constants.DATA_DIR, rel_name)
124 if os.path.isfile(full_name) and not os.path.islink(full_name):
125 utils.RemoveFile(full_name)
128 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
129 except errors.OpExecError, err:
130 logger.Error("Error while processing ssh files: %s" % err)
133 f = open(pub_key, 'r')
135 utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
139 utils.RemoveFile(priv_key)
140 utils.RemoveFile(pub_key)
143 def GetNodeInfo(vgname):
144 """Gives back a hash with different informations about the node.
147 { 'vg_size' : xxx, 'vg_free' : xxx, 'memory_domain0': xxx,
148 'memory_free' : xxx, 'memory_total' : xxx }
150 vg_size is the size of the configured volume group in MiB
151 vg_free is the free size of the volume group in MiB
152 memory_dom0 is the memory allocated for domain0 in MiB
153 memory_free is the currently available (free) ram in MiB
154 memory_total is the total number of ram in MiB
158 vginfo = _GetVGInfo(vgname)
159 outputarray['vg_size'] = vginfo['vg_size']
160 outputarray['vg_free'] = vginfo['vg_free']
162 hyper = hypervisor.GetHypervisor()
163 hyp_info = hyper.GetNodeInfo()
164 if hyp_info is not None:
165 outputarray.update(hyp_info)
167 f = open("/proc/sys/kernel/random/boot_id", 'r')
169 outputarray["bootid"] = f.read(128).rstrip("\n")
176 def VerifyNode(what):
177 """Verify the status of the local node.
180 what - a dictionary of things to check:
181 'filelist' : list of files for which to compute checksums
182 'nodelist' : list of nodes we should check communication with
183 'hypervisor': run the hypervisor-specific verify
185 Requested files on local node are checksummed and the result returned.
187 The nodelist is traversed, with the following checks being made
189 - known_hosts key correct
190 - correct resolving of node name (target node returns its own hostname
191 by ssh-execution of 'hostname', result compared against name in list.
196 if 'hypervisor' in what:
197 result['hypervisor'] = hypervisor.GetHypervisor().Verify()
199 if 'filelist' in what:
200 result['filelist'] = utils.FingerprintFiles(what['filelist'])
202 if 'nodelist' in what:
203 result['nodelist'] = {}
204 random.shuffle(what['nodelist'])
205 for node in what['nodelist']:
206 success, message = _GetSshRunner().VerifyNodeHostname(node)
208 result['nodelist'][node] = message
212 def GetVolumeList(vg_name):
213 """Compute list of logical volumes and their size.
216 dictionary of all partions (key) with their size (in MiB), inactive
218 {'test1': ('20.06', True, True)}
223 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
224 "--separator=%s" % sep,
225 "-olv_name,lv_size,lv_attr", vg_name])
227 logger.Error("Failed to list logical volumes, lvs output: %s" %
231 for line in result.stdout.splitlines():
232 line = line.strip().rstrip(sep)
233 name, size, attr = line.split(sep)
236 inactive = attr[4] == '-'
237 online = attr[5] == 'o'
238 lvs[name] = (size, inactive, online)
243 def ListVolumeGroups():
244 """List the volume groups and their size.
247 Dictionary with keys volume name and values the size of the volume
250 return utils.ListVolumeGroups()
254 """List all volumes on this node.
257 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
259 "--options=lv_name,lv_size,devices,vg_name"])
261 logger.Error("Failed to list logical volumes, lvs output: %s" %
267 return dev.split('(')[0]
273 'name': line[0].strip(),
274 'size': line[1].strip(),
275 'dev': parse_dev(line[2].strip()),
276 'vg': line[3].strip(),
279 return [map_line(line.split('|')) for line in result.stdout.splitlines()]
282 def BridgesExist(bridges_list):
283 """Check if a list of bridges exist on the current node.
286 True if all of them exist, false otherwise
289 for bridge in bridges_list:
290 if not utils.BridgeExists(bridge):
296 def GetInstanceList():
297 """Provides a list of instances.
300 A list of all running instances on the current node
301 - instance1.example.com
302 - instance2.example.com
306 names = hypervisor.GetHypervisor().ListInstances()
307 except errors.HypervisorError, err:
308 logger.Error("error enumerating instances: %s" % str(err))
314 def GetInstanceInfo(instance):
315 """Gives back the informations about an instance as a dictionary.
318 instance: name of the instance (ex. instance1.example.com)
321 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
323 memory: memory size of instance (int)
324 state: xen state of instance (string)
325 time: cpu time of instance (float)
330 iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
331 if iinfo is not None:
332 output['memory'] = iinfo[2]
333 output['state'] = iinfo[4]
334 output['time'] = iinfo[5]
339 def GetAllInstancesInfo():
340 """Gather data about all instances.
342 This is the equivalent of `GetInstanceInfo()`, except that it
343 computes data for all instances at once, thus being faster if one
344 needs data about more than one instance.
346 Returns: a dictionary of dictionaries, keys being the instance name,
348 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
350 memory: memory size of instance (int)
351 state: xen state of instance (string)
352 time: cpu time of instance (float)
353 vcpus: the number of cpus
358 iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
360 for name, inst_id, memory, vcpus, state, times in iinfo:
371 def AddOSToInstance(instance, os_disk, swap_disk):
372 """Add an OS to an instance.
375 instance: the instance object
376 os_disk: the instance-visible name of the os device
377 swap_disk: the instance-visible name of the swap device
380 inst_os = OSFromDisk(instance.os)
382 create_script = inst_os.create_script
384 os_device = instance.FindDisk(os_disk)
385 if os_device is None:
386 logger.Error("Can't find this device-visible name '%s'" % os_disk)
389 swap_device = instance.FindDisk(swap_disk)
390 if swap_device is None:
391 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
394 real_os_dev = _RecursiveFindBD(os_device)
395 if real_os_dev is None:
396 raise errors.BlockDeviceError("Block device '%s' is not set up" %
400 real_swap_dev = _RecursiveFindBD(swap_device)
401 if real_swap_dev is None:
402 raise errors.BlockDeviceError("Block device '%s' is not set up" %
406 logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
407 instance.name, int(time.time()))
408 if not os.path.exists(constants.LOG_OS_DIR):
409 os.mkdir(constants.LOG_OS_DIR, 0750)
411 command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
412 inst_os.path, create_script, instance.name,
413 real_os_dev.dev_path, real_swap_dev.dev_path,
416 result = utils.RunCmd(command)
418 logger.Error("os create command '%s' returned error: %s, logfile: %s,"
420 (command, result.fail_reason, logfile, result.output))
426 def RunRenameInstance(instance, old_name, os_disk, swap_disk):
427 """Run the OS rename script for an instance.
430 instance: the instance object
431 old_name: the old name of the instance
432 os_disk: the instance-visible name of the os device
433 swap_disk: the instance-visible name of the swap device
436 inst_os = OSFromDisk(instance.os)
438 script = inst_os.rename_script
440 os_device = instance.FindDisk(os_disk)
441 if os_device is None:
442 logger.Error("Can't find this device-visible name '%s'" % os_disk)
445 swap_device = instance.FindDisk(swap_disk)
446 if swap_device is None:
447 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
450 real_os_dev = _RecursiveFindBD(os_device)
451 if real_os_dev is None:
452 raise errors.BlockDeviceError("Block device '%s' is not set up" %
456 real_swap_dev = _RecursiveFindBD(swap_device)
457 if real_swap_dev is None:
458 raise errors.BlockDeviceError("Block device '%s' is not set up" %
462 logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
464 instance.name, int(time.time()))
465 if not os.path.exists(constants.LOG_OS_DIR):
466 os.mkdir(constants.LOG_OS_DIR, 0750)
468 command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
469 inst_os.path, script, old_name, instance.name,
470 real_os_dev.dev_path, real_swap_dev.dev_path,
473 result = utils.RunCmd(command)
476 logger.Error("os create command '%s' returned error: %s"
478 (command, result.fail_reason, result.output))
484 def _GetVGInfo(vg_name):
485 """Get informations about the volume group.
488 vg_name: the volume group
491 { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
493 vg_size is the total size of the volume group in MiB
494 vg_free is the free size of the volume group in MiB
495 pv_count are the number of physical disks in that vg
497 If an error occurs during gathering of data, we return the same dict
498 with keys all set to None.
501 retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
503 retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
504 "--nosuffix", "--units=m", "--separator=:", vg_name])
507 errmsg = "volume group %s not present" % vg_name
510 valarr = retval.stdout.strip().rstrip(':').split(':')
514 "vg_size": int(round(float(valarr[0]), 0)),
515 "vg_free": int(round(float(valarr[1]), 0)),
516 "pv_count": int(valarr[2]),
518 except ValueError, err:
519 logger.Error("Fail to parse vgs output: %s" % str(err))
521 logger.Error("vgs output has the wrong number of fields (expected"
522 " three): %s" % str(valarr))
526 def _GatherBlockDevs(instance):
527 """Set up an instance's block device(s).
529 This is run on the primary node at instance startup. The block
530 devices must be already assembled.
534 for disk in instance.disks:
535 device = _RecursiveFindBD(disk)
537 raise errors.BlockDeviceError("Block device '%s' is not set up." %
540 block_devices.append((disk, device))
544 def StartInstance(instance, extra_args):
545 """Start an instance.
548 instance - name of instance to start.
551 running_instances = GetInstanceList()
553 if instance.name in running_instances:
556 block_devices = _GatherBlockDevs(instance)
557 hyper = hypervisor.GetHypervisor()
560 hyper.StartInstance(instance, block_devices, extra_args)
561 except errors.HypervisorError, err:
562 logger.Error("Failed to start instance: %s" % err)
568 def ShutdownInstance(instance):
569 """Shut an instance down.
572 instance - name of instance to shutdown.
575 running_instances = GetInstanceList()
577 if instance.name not in running_instances:
580 hyper = hypervisor.GetHypervisor()
582 hyper.StopInstance(instance)
583 except errors.HypervisorError, err:
584 logger.Error("Failed to stop instance: %s" % err)
587 # test every 10secs for 2min
591 for dummy in range(11):
592 if instance.name not in GetInstanceList():
596 # the shutdown did not succeed
597 logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
600 hyper.StopInstance(instance, force=True)
601 except errors.HypervisorError, err:
602 logger.Error("Failed to stop instance: %s" % err)
606 if instance.name in GetInstanceList():
607 logger.Error("could not shutdown instance '%s' even by destroy")
613 def RebootInstance(instance, reboot_type, extra_args):
614 """Reboot an instance.
617 instance - name of instance to reboot
618 reboot_type - how to reboot [soft,hard,full]
621 running_instances = GetInstanceList()
623 if instance.name not in running_instances:
624 logger.Error("Cannot reboot instance that is not running")
627 hyper = hypervisor.GetHypervisor()
628 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
630 hyper.RebootInstance(instance)
631 except errors.HypervisorError, err:
632 logger.Error("Failed to soft reboot instance: %s" % err)
634 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
636 ShutdownInstance(instance)
637 StartInstance(instance, extra_args)
638 except errors.HypervisorError, err:
639 logger.Error("Failed to hard reboot instance: %s" % err)
642 raise errors.ParameterError("reboot_type invalid")
648 def CreateBlockDevice(disk, size, owner, on_primary, info):
649 """Creates a block device for an instance.
652 disk: a ganeti.objects.Disk object
653 size: the size of the physical underlying device
654 owner: a string with the name of the instance
655 on_primary: a boolean indicating if it is the primary node or not
656 info: string that will be sent to the physical device creation
659 the new unique_id of the device (this can sometime be
660 computed only after creation), or None. On secondary nodes,
661 it's not required to return anything.
666 for child in disk.children:
667 crdev = _RecursiveAssembleBD(child, owner, on_primary)
668 if on_primary or disk.AssembleOnSecondary():
669 # we need the children open in case the device itself has to
674 device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
675 if device is not None:
676 logger.Info("removing existing device %s" % disk)
678 except errors.BlockDeviceError, err:
681 device = bdev.Create(disk.dev_type, disk.physical_id,
684 raise ValueError("Can't create child device for %s, %s" %
686 if on_primary or disk.AssembleOnSecondary():
687 if not device.Assemble():
688 errorstring = "Can't assemble device after creation"
689 logger.Error(errorstring)
690 raise errors.BlockDeviceError("%s, very unusual event - check the node"
691 " daemon logs" % errorstring)
692 device.SetSyncSpeed(constants.SYNC_SPEED)
693 if on_primary or disk.OpenOnSecondary():
694 device.Open(force=True)
695 DevCacheManager.UpdateCache(device.dev_path, owner,
696 on_primary, disk.iv_name)
700 physical_id = device.unique_id
704 def RemoveBlockDevice(disk):
705 """Remove a block device.
707 This is intended to be called recursively.
711 # since we are removing the device, allow a partial match
712 # this allows removal of broken mirrors
713 rdev = _RecursiveFindBD(disk, allow_partial=True)
714 except errors.BlockDeviceError, err:
715 # probably can't attach
716 logger.Info("Can't attach to device %s in remove" % disk)
719 r_path = rdev.dev_path
720 result = rdev.Remove()
722 DevCacheManager.RemoveCache(r_path)
726 for child in disk.children:
727 result = result and RemoveBlockDevice(child)
731 def _RecursiveAssembleBD(disk, owner, as_primary):
732 """Activate a block device for an instance.
734 This is run on the primary and secondary nodes for an instance.
736 This function is called recursively.
739 disk: a objects.Disk object
740 as_primary: if we should make the block device read/write
743 the assembled device or None (in case no device was assembled)
745 If the assembly is not successful, an exception is raised.
750 mcn = disk.ChildrenNeeded()
752 mcn = 0 # max number of Nones allowed
754 mcn = len(disk.children) - mcn # max number of Nones
755 for chld_disk in disk.children:
757 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
758 except errors.BlockDeviceError, err:
759 if children.count(None) >= mcn:
762 logger.Debug("Error in child activation: %s" % str(err))
763 children.append(cdev)
765 if as_primary or disk.AssembleOnSecondary():
766 r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
767 r_dev.SetSyncSpeed(constants.SYNC_SPEED)
769 if as_primary or disk.OpenOnSecondary():
771 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
772 as_primary, disk.iv_name)
779 def AssembleBlockDevice(disk, owner, as_primary):
780 """Activate a block device for an instance.
782 This is a wrapper over _RecursiveAssembleBD.
785 a /dev path for primary nodes
786 True for secondary nodes
789 result = _RecursiveAssembleBD(disk, owner, as_primary)
790 if isinstance(result, bdev.BlockDev):
791 result = result.dev_path
795 def ShutdownBlockDevice(disk):
796 """Shut down a block device.
798 First, if the device is assembled (can `Attach()`), then the device
799 is shutdown. Then the children of the device are shutdown.
801 This function is called recursively. Note that we don't cache the
802 children or such, as oppossed to assemble, shutdown of different
803 devices doesn't require that the upper device was active.
806 r_dev = _RecursiveFindBD(disk)
807 if r_dev is not None:
808 r_path = r_dev.dev_path
809 result = r_dev.Shutdown()
811 DevCacheManager.RemoveCache(r_path)
815 for child in disk.children:
816 result = result and ShutdownBlockDevice(child)
820 def MirrorAddChildren(parent_cdev, new_cdevs):
821 """Extend a mirrored block device.
824 parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
825 if parent_bdev is None:
826 logger.Error("Can't find parent device")
828 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
829 if new_bdevs.count(None) > 0:
830 logger.Error("Can't find new device(s) to add: %s:%s" %
831 (new_bdevs, new_cdevs))
833 parent_bdev.AddChildren(new_bdevs)
837 def MirrorRemoveChildren(parent_cdev, new_cdevs):
838 """Shrink a mirrored block device.
841 parent_bdev = _RecursiveFindBD(parent_cdev)
842 if parent_bdev is None:
843 logger.Error("Can't find parent in remove children: %s" % parent_cdev)
846 for disk in new_cdevs:
847 rpath = disk.StaticDevPath()
849 bd = _RecursiveFindBD(disk)
851 logger.Error("Can't find dynamic device %s while removing children" %
855 devs.append(bd.dev_path)
858 parent_bdev.RemoveChildren(devs)
862 def GetMirrorStatus(disks):
863 """Get the mirroring status of a list of devices.
866 disks: list of `objects.Disk`
869 list of (mirror_done, estimated_time) tuples, which
870 are the result of bdev.BlockDevice.CombinedSyncStatus()
875 rbd = _RecursiveFindBD(dsk)
877 raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
878 stats.append(rbd.CombinedSyncStatus())
882 def _RecursiveFindBD(disk, allow_partial=False):
883 """Check if a device is activated.
885 If so, return informations about the real device.
888 disk: the objects.Disk instance
889 allow_partial: don't abort the find if a child of the
890 device can't be found; this is intended to be
891 used when repairing mirrors
894 None if the device can't be found
895 otherwise the device instance
900 for chdisk in disk.children:
901 children.append(_RecursiveFindBD(chdisk))
903 return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
906 def FindBlockDevice(disk):
907 """Check if a device is activated.
909 If so, return informations about the real device.
912 disk: the objects.Disk instance
914 None if the device can't be found
915 (device_path, major, minor, sync_percent, estimated_time, is_degraded)
918 rbd = _RecursiveFindBD(disk)
921 return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
924 def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
925 """Write a file to the filesystem.
927 This allows the master to overwrite(!) a file. It will only perform
928 the operation if the file belongs to a list of configuration files.
931 if not os.path.isabs(file_name):
932 logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
937 constants.CLUSTER_CONF_FILE,
939 constants.SSH_KNOWN_HOSTS_FILE,
941 allowed_files.extend(ssconf.SimpleStore().GetFileList())
942 if file_name not in allowed_files:
943 logger.Error("Filename passed to UploadFile not in allowed"
944 " upload targets: '%s'" % file_name)
947 utils.WriteFile(file_name, data=data, mode=mode, uid=uid, gid=gid,
948 atime=atime, mtime=mtime)
952 def _ErrnoOrStr(err):
953 """Format an EnvironmentError exception.
955 If the `err` argument has an errno attribute, it will be looked up
956 and converted into a textual EXXXX description. Otherwise the string
957 representation of the error will be returned.
960 if hasattr(err, 'errno'):
961 detail = errno.errorcode[err.errno]
967 def _OSOndiskVersion(name, os_dir):
968 """Compute and return the API version of a given OS.
970 This function will try to read the API version of the os given by
971 the 'name' parameter and residing in the 'os_dir' directory.
973 Return value will be either an integer denoting the version or None in the
974 case when this is not a valid OS name.
977 api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
980 st = os.stat(api_file)
981 except EnvironmentError, err:
982 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
983 " found (%s)" % _ErrnoOrStr(err))
985 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
986 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
992 api_version = f.read(256)
995 except EnvironmentError, err:
996 raise errors.InvalidOS(name, os_dir, "error while reading the"
997 " API version (%s)" % _ErrnoOrStr(err))
999 api_version = api_version.strip()
1001 api_version = int(api_version)
1002 except (TypeError, ValueError), err:
1003 raise errors.InvalidOS(name, os_dir,
1004 "API version is not integer (%s)" % str(err))
1009 def DiagnoseOS(top_dirs=None):
1010 """Compute the validity for all OSes.
1012 Returns an OS object for each name in all the given top directories
1013 (if not given defaults to constants.OS_SEARCH_PATH)
1019 if top_dirs is None:
1020 top_dirs = constants.OS_SEARCH_PATH
1023 for dir_name in top_dirs:
1024 if os.path.isdir(dir_name):
1026 f_names = utils.ListVisibleFiles(dir_name)
1027 except EnvironmentError, err:
1028 logger.Error("Can't list the OS directory %s: %s" %
1029 (dir_name, str(err)))
1031 for name in f_names:
1033 os_inst = OSFromDisk(name, base_dir=dir_name)
1034 result.append(os_inst)
1035 except errors.InvalidOS, err:
1036 result.append(objects.OS.FromInvalidOS(err))
1041 def OSFromDisk(name, base_dir=None):
1042 """Create an OS instance from disk.
1044 This function will return an OS instance if the given name is a
1045 valid OS name. Otherwise, it will raise an appropriate
1046 `errors.InvalidOS` exception, detailing why this is not a valid
1050 os_dir: Directory containing the OS scripts. Defaults to a search
1051 in all the OS_SEARCH_PATH directories.
1055 if base_dir is None:
1056 os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
1058 raise errors.InvalidOS(name, None, "OS dir not found in search path")
1060 os_dir = os.path.sep.join([base_dir, name])
1062 api_version = _OSOndiskVersion(name, os_dir)
1064 if api_version != constants.OS_API_VERSION:
1065 raise errors.InvalidOS(name, os_dir, "API version mismatch"
1066 " (found %s want %s)"
1067 % (api_version, constants.OS_API_VERSION))
1069 # OS Scripts dictionary, we will populate it with the actual script names
1070 os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1072 for script in os_scripts:
1073 os_scripts[script] = os.path.sep.join([os_dir, script])
1076 st = os.stat(os_scripts[script])
1077 except EnvironmentError, err:
1078 raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1079 (script, _ErrnoOrStr(err)))
1081 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1082 raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1085 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1086 raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1090 return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1091 create_script=os_scripts['create'],
1092 export_script=os_scripts['export'],
1093 import_script=os_scripts['import'],
1094 rename_script=os_scripts['rename'],
1095 api_version=api_version)
1098 def SnapshotBlockDevice(disk):
1099 """Create a snapshot copy of a block device.
1101 This function is called recursively, and the snapshot is actually created
1102 just for the leaf lvm backend device.
1105 disk: the disk to be snapshotted
1108 a config entry for the actual lvm device snapshotted.
1112 if len(disk.children) == 1:
1113 # only one child, let's recurse on it
1114 return SnapshotBlockDevice(disk.children[0])
1116 # more than one child, choose one that matches
1117 for child in disk.children:
1118 if child.size == disk.size:
1119 # return implies breaking the loop
1120 return SnapshotBlockDevice(child)
1121 elif disk.dev_type == constants.LD_LV:
1122 r_dev = _RecursiveFindBD(disk)
1123 if r_dev is not None:
1124 # let's stay on the safe side and ask for the full size, for now
1125 return r_dev.Snapshot(disk.size)
1129 raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1130 " '%s' of type '%s'" %
1131 (disk.unique_id, disk.dev_type))
1134 def ExportSnapshot(disk, dest_node, instance):
1135 """Export a block device snapshot to a remote node.
1138 disk: the snapshot block device
1139 dest_node: the node to send the image to
1140 instance: instance being exported
1143 True if successful, False otherwise.
1146 inst_os = OSFromDisk(instance.os)
1147 export_script = inst_os.export_script
1149 logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1150 instance.name, int(time.time()))
1151 if not os.path.exists(constants.LOG_OS_DIR):
1152 os.mkdir(constants.LOG_OS_DIR, 0750)
1154 real_os_dev = _RecursiveFindBD(disk)
1155 if real_os_dev is None:
1156 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1160 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1161 destfile = disk.physical_id[1]
1163 # the target command is built out of three individual commands,
1164 # which are joined by pipes; we check each individual command for
1167 expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1168 export_script, instance.name,
1169 real_os_dev.dev_path, logfile)
1173 destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1174 destdir, destdir, destfile)
1175 remotecmd = _GetSshRunner().BuildCmd(dest_node, constants.GANETI_RUNAS,
1178 # all commands have been checked, so we're safe to combine them
1179 command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1181 result = utils.RunCmd(command)
1184 logger.Error("os snapshot export command '%s' returned error: %s"
1186 (command, result.fail_reason, result.output))
1192 def FinalizeExport(instance, snap_disks):
1193 """Write out the export configuration information.
1196 instance: instance configuration
1197 snap_disks: snapshot block devices
1200 False in case of error, True otherwise.
1203 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1204 finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1206 config = objects.SerializableConfigParser()
1208 config.add_section(constants.INISECT_EXP)
1209 config.set(constants.INISECT_EXP, 'version', '0')
1210 config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1211 config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1212 config.set(constants.INISECT_EXP, 'os', instance.os)
1213 config.set(constants.INISECT_EXP, 'compression', 'gzip')
1215 config.add_section(constants.INISECT_INS)
1216 config.set(constants.INISECT_INS, 'name', instance.name)
1217 config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1218 config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1219 config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1222 for nic_count, nic in enumerate(instance.nics):
1223 config.set(constants.INISECT_INS, 'nic%d_mac' %
1224 nic_count, '%s' % nic.mac)
1225 config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1226 config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1227 # TODO: redundant: on load can read nics until it doesn't exist
1228 config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1231 for disk_count, disk in enumerate(snap_disks):
1232 config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1233 ('%s' % disk.iv_name))
1234 config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1235 ('%s' % disk.physical_id[1]))
1236 config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1238 config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1240 cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1241 cfo = open(cff, 'w')
1247 shutil.rmtree(finaldestdir, True)
1248 shutil.move(destdir, finaldestdir)
1253 def ExportInfo(dest):
1254 """Get export configuration information.
1257 dest: directory containing the export
1260 A serializable config file containing the export info.
1263 cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1265 config = objects.SerializableConfigParser()
1268 if (not config.has_section(constants.INISECT_EXP) or
1269 not config.has_section(constants.INISECT_INS)):
1275 def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1276 """Import an os image into an instance.
1279 instance: the instance object
1280 os_disk: the instance-visible name of the os device
1281 swap_disk: the instance-visible name of the swap device
1282 src_node: node holding the source image
1283 src_image: path to the source image on src_node
1286 False in case of error, True otherwise.
1289 inst_os = OSFromDisk(instance.os)
1290 import_script = inst_os.import_script
1292 os_device = instance.FindDisk(os_disk)
1293 if os_device is None:
1294 logger.Error("Can't find this device-visible name '%s'" % os_disk)
1297 swap_device = instance.FindDisk(swap_disk)
1298 if swap_device is None:
1299 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1302 real_os_dev = _RecursiveFindBD(os_device)
1303 if real_os_dev is None:
1304 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1308 real_swap_dev = _RecursiveFindBD(swap_device)
1309 if real_swap_dev is None:
1310 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1312 real_swap_dev.Open()
1314 logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1315 instance.name, int(time.time()))
1316 if not os.path.exists(constants.LOG_OS_DIR):
1317 os.mkdir(constants.LOG_OS_DIR, 0750)
1319 destcmd = utils.BuildShellCmd('cat %s', src_image)
1320 remotecmd = _GetSshRunner().BuildCmd(src_node, constants.GANETI_RUNAS,
1324 impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1325 inst_os.path, import_script, instance.name,
1326 real_os_dev.dev_path, real_swap_dev.dev_path,
1329 command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1331 result = utils.RunCmd(command)
1334 logger.Error("os import command '%s' returned error: %s"
1336 (command, result.fail_reason, result.output))
1343 """Return a list of exports currently available on this machine.
1346 if os.path.isdir(constants.EXPORT_DIR):
1347 return utils.ListVisibleFiles(constants.EXPORT_DIR)
1352 def RemoveExport(export):
1353 """Remove an existing export from the node.
1356 export: the name of the export to remove
1359 False in case of error, True otherwise.
1362 target = os.path.join(constants.EXPORT_DIR, export)
1364 shutil.rmtree(target)
1365 # TODO: catch some of the relevant exceptions and provide a pretty
1366 # error message if rmtree fails.
1371 def RenameBlockDevices(devlist):
1372 """Rename a list of block devices.
1374 The devlist argument is a list of tuples (disk, new_logical,
1375 new_physical). The return value will be a combined boolean result
1376 (True only if all renames succeeded).
1380 for disk, unique_id in devlist:
1381 dev = _RecursiveFindBD(disk)
1386 old_rpath = dev.dev_path
1387 dev.Rename(unique_id)
1388 new_rpath = dev.dev_path
1389 if old_rpath != new_rpath:
1390 DevCacheManager.RemoveCache(old_rpath)
1391 # FIXME: we should add the new cache information here, like:
1392 # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1393 # but we don't have the owner here - maybe parse from existing
1394 # cache? for now, we only lose lvm data when we rename, which
1395 # is less critical than DRBD or MD
1396 except errors.BlockDeviceError, err:
1397 logger.Error("Can't rename device '%s' to '%s': %s" %
1398 (dev, unique_id, err))
1403 def _TransformFileStorageDir(file_storage_dir):
1404 """Checks whether given file_storage_dir is valid.
1406 Checks wheter the given file_storage_dir is within the cluster-wide
1407 default file_storage_dir stored in SimpleStore. Only paths under that
1408 directory are allowed.
1411 file_storage_dir: string with path
1414 normalized file_storage_dir (string) if valid, None otherwise
1417 file_storage_dir = os.path.normpath(file_storage_dir)
1418 base_file_storage_dir = ssconf.SimpleStore().GetFileStorageDir()
1419 if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
1420 base_file_storage_dir):
1421 logger.Error("file storage directory '%s' is not under base file"
1422 " storage directory '%s'" %
1423 (file_storage_dir, base_file_storage_dir))
1425 return file_storage_dir
1428 def CreateFileStorageDir(file_storage_dir):
1429 """Create file storage directory.
1432 file_storage_dir: string containing the path
1435 tuple with first element a boolean indicating wheter dir
1436 creation was successful or not
1439 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1441 if not file_storage_dir:
1444 if os.path.exists(file_storage_dir):
1445 if not os.path.isdir(file_storage_dir):
1446 logger.Error("'%s' is not a directory" % file_storage_dir)
1450 os.makedirs(file_storage_dir, 0750)
1451 except OSError, err:
1452 logger.Error("Cannot create file storage directory '%s': %s" %
1453 (file_storage_dir, err))
1458 def RemoveFileStorageDir(file_storage_dir):
1459 """Remove file storage directory.
1461 Remove it only if it's empty. If not log an error and return.
1464 file_storage_dir: string containing the path
1467 tuple with first element a boolean indicating wheter dir
1468 removal was successful or not
1471 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1473 if not file_storage_dir:
1476 if os.path.exists(file_storage_dir):
1477 if not os.path.isdir(file_storage_dir):
1478 logger.Error("'%s' is not a directory" % file_storage_dir)
1480 # deletes dir only if empty, otherwise we want to return False
1482 os.rmdir(file_storage_dir)
1483 except OSError, err:
1484 logger.Error("Cannot remove file storage directory '%s': %s" %
1485 (file_storage_dir, err))
1490 def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
1491 """Rename the file storage directory.
1494 old_file_storage_dir: string containing the old path
1495 new_file_storage_dir: string containing the new path
1498 tuple with first element a boolean indicating wheter dir
1499 rename was successful or not
1502 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
1503 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
1505 if not old_file_storage_dir or not new_file_storage_dir:
1508 if not os.path.exists(new_file_storage_dir):
1509 if os.path.isdir(old_file_storage_dir):
1511 os.rename(old_file_storage_dir, new_file_storage_dir)
1512 except OSError, err:
1513 logger.Error("Cannot rename '%s' to '%s': %s"
1514 % (old_file_storage_dir, new_file_storage_dir, err))
1517 logger.Error("'%s' is not a directory" % old_file_storage_dir)
1520 if os.path.exists(old_file_storage_dir):
1521 logger.Error("Cannot rename '%s' to '%s'. Both locations exist." %
1522 old_file_storage_dir, new_file_storage_dir)
1527 class HooksRunner(object):
1530 This class is instantiated on the node side (ganeti-noded) and not on
1534 RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1536 def __init__(self, hooks_base_dir=None):
1537 """Constructor for hooks runner.
1540 - hooks_base_dir: if not None, this overrides the
1541 constants.HOOKS_BASE_DIR (useful for unittests)
1544 if hooks_base_dir is None:
1545 hooks_base_dir = constants.HOOKS_BASE_DIR
1546 self._BASE_DIR = hooks_base_dir
1549 def ExecHook(script, env):
1550 """Exec one hook script.
1553 - script: the full path to the script
1554 - env: the environment with which to exec the script
1557 # exec the process using subprocess and log the output
1560 fdstdin = open("/dev/null", "r")
1561 child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1562 stderr=subprocess.STDOUT, close_fds=True,
1563 shell=False, cwd="/", env=env)
1566 output = child.stdout.read(4096)
1567 child.stdout.close()
1568 except EnvironmentError, err:
1569 output += "Hook script error: %s" % str(err)
1573 result = child.wait()
1575 except EnvironmentError, err:
1576 if err.errno == errno.EINTR:
1580 # try not to leak fds
1581 for fd in (fdstdin, ):
1585 except EnvironmentError, err:
1586 # just log the error
1587 #logger.Error("While closing fd %s: %s" % (fd, err))
1590 return result == 0, output
1592 def RunHooks(self, hpath, phase, env):
1593 """Run the scripts in the hooks directory.
1595 This method will not be usually overriden by child opcodes.
1598 if phase == constants.HOOKS_PHASE_PRE:
1600 elif phase == constants.HOOKS_PHASE_POST:
1603 raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1606 subdir = "%s-%s.d" % (hpath, suffix)
1607 dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1609 dir_contents = utils.ListVisibleFiles(dir_name)
1610 except OSError, err:
1614 # we use the standard python sort order,
1615 # so 00name is the recommended naming scheme
1617 for relname in dir_contents:
1618 fname = os.path.join(dir_name, relname)
1619 if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1620 self.RE_MASK.match(relname) is not None):
1621 rrval = constants.HKR_SKIP
1624 result, output = self.ExecHook(fname, env)
1626 rrval = constants.HKR_FAIL
1628 rrval = constants.HKR_SUCCESS
1629 rr.append(("%s/%s" % (subdir, relname), rrval, output))
1634 class IAllocatorRunner(object):
1635 """IAllocator runner.
1637 This class is instantiated on the node side (ganeti-noded) and not on
1641 def Run(self, name, idata):
1642 """Run an iallocator script.
1644 Return value: tuple of:
1645 - run status (one of the IARUN_ constants)
1648 - fail reason (as from utils.RunResult)
1651 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
1653 if alloc_script is None:
1654 return (constants.IARUN_NOTFOUND, None, None, None)
1656 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
1660 result = utils.RunCmd([alloc_script, fin_name])
1662 return (constants.IARUN_FAILURE, result.stdout, result.stderr,
1667 return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
1670 class DevCacheManager(object):
1671 """Simple class for managing a cache of block device information.
1674 _DEV_PREFIX = "/dev/"
1675 _ROOT_DIR = constants.BDEV_CACHE_DIR
1678 def _ConvertPath(cls, dev_path):
1679 """Converts a /dev/name path to the cache file name.
1681 This replaces slashes with underscores and strips the /dev
1682 prefix. It then returns the full path to the cache file
1685 if dev_path.startswith(cls._DEV_PREFIX):
1686 dev_path = dev_path[len(cls._DEV_PREFIX):]
1687 dev_path = dev_path.replace("/", "_")
1688 fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1692 def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1693 """Updates the cache information for a given device.
1696 if dev_path is None:
1697 logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1699 fpath = cls._ConvertPath(dev_path)
1705 iv_name = "not_visible"
1706 fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1708 utils.WriteFile(fpath, data=fdata)
1709 except EnvironmentError, err:
1710 logger.Error("Can't update bdev cache for %s, error %s" %
1711 (dev_path, str(err)))
1714 def RemoveCache(cls, dev_path):
1715 """Remove data for a dev_path.
1718 if dev_path is None:
1719 logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1721 fpath = cls._ConvertPath(dev_path)
1723 utils.RemoveFile(fpath)
1724 except EnvironmentError, err:
1725 logger.Error("Can't update bdev cache for %s, error %s" %
1726 (dev_path, str(err)))