4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Functions used by the node daemon"""
34 from ganeti import logger
35 from ganeti import errors
36 from ganeti import utils
37 from ganeti import ssh
38 from ganeti import hypervisor
39 from ganeti import constants
40 from ganeti import bdev
41 from ganeti import objects
42 from ganeti import ssconf
46 return ssh.SshRunner()
50 """Activate local node as master node.
52 There are two needed steps for this:
53 - run the master script
54 - register the cron script
57 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
60 logger.Error("could not activate cluster interface with command %s,"
61 " error: '%s'" % (result.cmd, result.output))
68 """Deactivate this node as master.
70 This runs the master stop script.
73 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
76 logger.Error("could not deactivate cluster interface with command %s,"
77 " error: '%s'" % (result.cmd, result.output))
83 def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
84 """Joins this node to the cluster.
86 This does the following:
87 - updates the hostkeys of the machine (rsa and dsa)
88 - adds the ssh private key to the user
89 - adds the ssh public key to the users' authorized_keys file
92 sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
93 (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
94 (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
95 (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
96 for name, content, mode in sshd_keys:
97 utils.WriteFile(name, data=content, mode=mode)
100 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
102 except errors.OpExecError, err:
103 logger.Error("Error while processing user ssh files: %s" % err)
106 for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
107 utils.WriteFile(name, data=content, mode=0600)
109 utils.AddAuthorizedKey(auth_keys, sshpub)
111 utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
117 """Cleans up the current node and prepares it to be removed from the cluster.
120 if os.path.isdir(constants.DATA_DIR):
121 for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
122 full_name = os.path.join(constants.DATA_DIR, rel_name)
123 if os.path.isfile(full_name) and not os.path.islink(full_name):
124 utils.RemoveFile(full_name)
127 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
128 except errors.OpExecError, err:
129 logger.Error("Error while processing ssh files: %s" % err)
132 f = open(pub_key, 'r')
134 utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
138 utils.RemoveFile(priv_key)
139 utils.RemoveFile(pub_key)
142 def GetNodeInfo(vgname):
143 """Gives back a hash with different informations about the node.
146 { 'vg_size' : xxx, 'vg_free' : xxx, 'memory_domain0': xxx,
147 'memory_free' : xxx, 'memory_total' : xxx }
149 vg_size is the size of the configured volume group in MiB
150 vg_free is the free size of the volume group in MiB
151 memory_dom0 is the memory allocated for domain0 in MiB
152 memory_free is the currently available (free) ram in MiB
153 memory_total is the total number of ram in MiB
157 vginfo = _GetVGInfo(vgname)
158 outputarray['vg_size'] = vginfo['vg_size']
159 outputarray['vg_free'] = vginfo['vg_free']
161 hyper = hypervisor.GetHypervisor()
162 hyp_info = hyper.GetNodeInfo()
163 if hyp_info is not None:
164 outputarray.update(hyp_info)
166 f = open("/proc/sys/kernel/random/boot_id", 'r')
168 outputarray["bootid"] = f.read(128).rstrip("\n")
175 def VerifyNode(what):
176 """Verify the status of the local node.
179 what - a dictionary of things to check:
180 'filelist' : list of files for which to compute checksums
181 'nodelist' : list of nodes we should check communication with
182 'hypervisor': run the hypervisor-specific verify
184 Requested files on local node are checksummed and the result returned.
186 The nodelist is traversed, with the following checks being made
188 - known_hosts key correct
189 - correct resolving of node name (target node returns its own hostname
190 by ssh-execution of 'hostname', result compared against name in list.
195 if 'hypervisor' in what:
196 result['hypervisor'] = hypervisor.GetHypervisor().Verify()
198 if 'filelist' in what:
199 result['filelist'] = utils.FingerprintFiles(what['filelist'])
201 if 'nodelist' in what:
202 result['nodelist'] = {}
203 for node in what['nodelist']:
204 success, message = _GetSshRunner().VerifyNodeHostname(node)
206 result['nodelist'][node] = message
210 def GetVolumeList(vg_name):
211 """Compute list of logical volumes and their size.
214 dictionary of all partions (key) with their size (in MiB), inactive
216 {'test1': ('20.06', True, True)}
221 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
222 "--separator=%s" % sep,
223 "-olv_name,lv_size,lv_attr", vg_name])
225 logger.Error("Failed to list logical volumes, lvs output: %s" %
229 for line in result.stdout.splitlines():
230 line = line.strip().rstrip(sep)
231 name, size, attr = line.split(sep)
234 inactive = attr[4] == '-'
235 online = attr[5] == 'o'
236 lvs[name] = (size, inactive, online)
241 def ListVolumeGroups():
242 """List the volume groups and their size.
245 Dictionary with keys volume name and values the size of the volume
248 return utils.ListVolumeGroups()
252 """List all volumes on this node.
255 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
257 "--options=lv_name,lv_size,devices,vg_name"])
259 logger.Error("Failed to list logical volumes, lvs output: %s" %
265 return dev.split('(')[0]
271 'name': line[0].strip(),
272 'size': line[1].strip(),
273 'dev': parse_dev(line[2].strip()),
274 'vg': line[3].strip(),
277 return [map_line(line.split('|')) for line in result.stdout.splitlines()]
280 def BridgesExist(bridges_list):
281 """Check if a list of bridges exist on the current node.
284 True if all of them exist, false otherwise
287 for bridge in bridges_list:
288 if not utils.BridgeExists(bridge):
294 def GetInstanceList():
295 """Provides a list of instances.
298 A list of all running instances on the current node
299 - instance1.example.com
300 - instance2.example.com
304 names = hypervisor.GetHypervisor().ListInstances()
305 except errors.HypervisorError, err:
306 logger.Error("error enumerating instances: %s" % str(err))
312 def GetInstanceInfo(instance):
313 """Gives back the informations about an instance as a dictionary.
316 instance: name of the instance (ex. instance1.example.com)
319 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
321 memory: memory size of instance (int)
322 state: xen state of instance (string)
323 time: cpu time of instance (float)
328 iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
329 if iinfo is not None:
330 output['memory'] = iinfo[2]
331 output['state'] = iinfo[4]
332 output['time'] = iinfo[5]
337 def GetAllInstancesInfo():
338 """Gather data about all instances.
340 This is the equivalent of `GetInstanceInfo()`, except that it
341 computes data for all instances at once, thus being faster if one
342 needs data about more than one instance.
344 Returns: a dictionary of dictionaries, keys being the instance name,
346 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
348 memory: memory size of instance (int)
349 state: xen state of instance (string)
350 time: cpu time of instance (float)
351 vcpus: the number of cpus
356 iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
358 for name, inst_id, memory, vcpus, state, times in iinfo:
369 def AddOSToInstance(instance, os_disk, swap_disk):
370 """Add an OS to an instance.
373 instance: the instance object
374 os_disk: the instance-visible name of the os device
375 swap_disk: the instance-visible name of the swap device
378 inst_os = OSFromDisk(instance.os)
380 create_script = inst_os.create_script
382 os_device = instance.FindDisk(os_disk)
383 if os_device is None:
384 logger.Error("Can't find this device-visible name '%s'" % os_disk)
387 swap_device = instance.FindDisk(swap_disk)
388 if swap_device is None:
389 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
392 real_os_dev = _RecursiveFindBD(os_device)
393 if real_os_dev is None:
394 raise errors.BlockDeviceError("Block device '%s' is not set up" %
398 real_swap_dev = _RecursiveFindBD(swap_device)
399 if real_swap_dev is None:
400 raise errors.BlockDeviceError("Block device '%s' is not set up" %
404 logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
405 instance.name, int(time.time()))
406 if not os.path.exists(constants.LOG_OS_DIR):
407 os.mkdir(constants.LOG_OS_DIR, 0750)
409 command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
410 inst_os.path, create_script, instance.name,
411 real_os_dev.dev_path, real_swap_dev.dev_path,
414 result = utils.RunCmd(command)
416 logger.Error("os create command '%s' returned error: %s, logfile: %s,"
418 (command, result.fail_reason, logfile, result.output))
424 def RunRenameInstance(instance, old_name, os_disk, swap_disk):
425 """Run the OS rename script for an instance.
428 instance: the instance object
429 old_name: the old name of the instance
430 os_disk: the instance-visible name of the os device
431 swap_disk: the instance-visible name of the swap device
434 inst_os = OSFromDisk(instance.os)
436 script = inst_os.rename_script
438 os_device = instance.FindDisk(os_disk)
439 if os_device is None:
440 logger.Error("Can't find this device-visible name '%s'" % os_disk)
443 swap_device = instance.FindDisk(swap_disk)
444 if swap_device is None:
445 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
448 real_os_dev = _RecursiveFindBD(os_device)
449 if real_os_dev is None:
450 raise errors.BlockDeviceError("Block device '%s' is not set up" %
454 real_swap_dev = _RecursiveFindBD(swap_device)
455 if real_swap_dev is None:
456 raise errors.BlockDeviceError("Block device '%s' is not set up" %
460 logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
462 instance.name, int(time.time()))
463 if not os.path.exists(constants.LOG_OS_DIR):
464 os.mkdir(constants.LOG_OS_DIR, 0750)
466 command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
467 inst_os.path, script, old_name, instance.name,
468 real_os_dev.dev_path, real_swap_dev.dev_path,
471 result = utils.RunCmd(command)
474 logger.Error("os create command '%s' returned error: %s"
476 (command, result.fail_reason, result.output))
482 def _GetVGInfo(vg_name):
483 """Get informations about the volume group.
486 vg_name: the volume group
489 { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
491 vg_size is the total size of the volume group in MiB
492 vg_free is the free size of the volume group in MiB
493 pv_count are the number of physical disks in that vg
495 If an error occurs during gathering of data, we return the same dict
496 with keys all set to None.
499 retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
501 retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
502 "--nosuffix", "--units=m", "--separator=:", vg_name])
505 errmsg = "volume group %s not present" % vg_name
508 valarr = retval.stdout.strip().rstrip(':').split(':')
512 "vg_size": int(round(float(valarr[0]), 0)),
513 "vg_free": int(round(float(valarr[1]), 0)),
514 "pv_count": int(valarr[2]),
516 except ValueError, err:
517 logger.Error("Fail to parse vgs output: %s" % str(err))
519 logger.Error("vgs output has the wrong number of fields (expected"
520 " three): %s" % str(valarr))
524 def _GatherBlockDevs(instance):
525 """Set up an instance's block device(s).
527 This is run on the primary node at instance startup. The block
528 devices must be already assembled.
532 for disk in instance.disks:
533 device = _RecursiveFindBD(disk)
535 raise errors.BlockDeviceError("Block device '%s' is not set up." %
538 block_devices.append((disk, device))
542 def StartInstance(instance, extra_args):
543 """Start an instance.
546 instance - name of instance to start.
549 running_instances = GetInstanceList()
551 if instance.name in running_instances:
554 block_devices = _GatherBlockDevs(instance)
555 hyper = hypervisor.GetHypervisor()
558 hyper.StartInstance(instance, block_devices, extra_args)
559 except errors.HypervisorError, err:
560 logger.Error("Failed to start instance: %s" % err)
566 def ShutdownInstance(instance):
567 """Shut an instance down.
570 instance - name of instance to shutdown.
573 running_instances = GetInstanceList()
575 if instance.name not in running_instances:
578 hyper = hypervisor.GetHypervisor()
580 hyper.StopInstance(instance)
581 except errors.HypervisorError, err:
582 logger.Error("Failed to stop instance: %s" % err)
585 # test every 10secs for 2min
589 for dummy in range(11):
590 if instance.name not in GetInstanceList():
594 # the shutdown did not succeed
595 logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
598 hyper.StopInstance(instance, force=True)
599 except errors.HypervisorError, err:
600 logger.Error("Failed to stop instance: %s" % err)
604 if instance.name in GetInstanceList():
605 logger.Error("could not shutdown instance '%s' even by destroy")
611 def RebootInstance(instance, reboot_type, extra_args):
612 """Reboot an instance.
615 instance - name of instance to reboot
616 reboot_type - how to reboot [soft,hard,full]
619 running_instances = GetInstanceList()
621 if instance.name not in running_instances:
622 logger.Error("Cannot reboot instance that is not running")
625 hyper = hypervisor.GetHypervisor()
626 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
628 hyper.RebootInstance(instance)
629 except errors.HypervisorError, err:
630 logger.Error("Failed to soft reboot instance: %s" % err)
632 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
634 ShutdownInstance(instance)
635 StartInstance(instance, extra_args)
636 except errors.HypervisorError, err:
637 logger.Error("Failed to hard reboot instance: %s" % err)
640 raise errors.ParameterError("reboot_type invalid")
646 def CreateBlockDevice(disk, size, owner, on_primary, info):
647 """Creates a block device for an instance.
650 disk: a ganeti.objects.Disk object
651 size: the size of the physical underlying device
652 owner: a string with the name of the instance
653 on_primary: a boolean indicating if it is the primary node or not
654 info: string that will be sent to the physical device creation
657 the new unique_id of the device (this can sometime be
658 computed only after creation), or None. On secondary nodes,
659 it's not required to return anything.
664 for child in disk.children:
665 crdev = _RecursiveAssembleBD(child, owner, on_primary)
666 if on_primary or disk.AssembleOnSecondary():
667 # we need the children open in case the device itself has to
672 device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
673 if device is not None:
674 logger.Info("removing existing device %s" % disk)
676 except errors.BlockDeviceError, err:
679 device = bdev.Create(disk.dev_type, disk.physical_id,
682 raise ValueError("Can't create child device for %s, %s" %
684 if on_primary or disk.AssembleOnSecondary():
685 if not device.Assemble():
686 errorstring = "Can't assemble device after creation"
687 logger.Error(errorstring)
688 raise errors.BlockDeviceError("%s, very unusual event - check the node"
689 " daemon logs" % errorstring)
690 device.SetSyncSpeed(constants.SYNC_SPEED)
691 if on_primary or disk.OpenOnSecondary():
692 device.Open(force=True)
693 DevCacheManager.UpdateCache(device.dev_path, owner,
694 on_primary, disk.iv_name)
698 physical_id = device.unique_id
702 def RemoveBlockDevice(disk):
703 """Remove a block device.
705 This is intended to be called recursively.
709 # since we are removing the device, allow a partial match
710 # this allows removal of broken mirrors
711 rdev = _RecursiveFindBD(disk, allow_partial=True)
712 except errors.BlockDeviceError, err:
713 # probably can't attach
714 logger.Info("Can't attach to device %s in remove" % disk)
717 r_path = rdev.dev_path
718 result = rdev.Remove()
720 DevCacheManager.RemoveCache(r_path)
724 for child in disk.children:
725 result = result and RemoveBlockDevice(child)
729 def _RecursiveAssembleBD(disk, owner, as_primary):
730 """Activate a block device for an instance.
732 This is run on the primary and secondary nodes for an instance.
734 This function is called recursively.
737 disk: a objects.Disk object
738 as_primary: if we should make the block device read/write
741 the assembled device or None (in case no device was assembled)
743 If the assembly is not successful, an exception is raised.
748 mcn = disk.ChildrenNeeded()
750 mcn = 0 # max number of Nones allowed
752 mcn = len(disk.children) - mcn # max number of Nones
753 for chld_disk in disk.children:
755 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
756 except errors.BlockDeviceError, err:
757 if children.count(None) >= mcn:
760 logger.Debug("Error in child activation: %s" % str(err))
761 children.append(cdev)
763 if as_primary or disk.AssembleOnSecondary():
764 r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
765 r_dev.SetSyncSpeed(constants.SYNC_SPEED)
767 if as_primary or disk.OpenOnSecondary():
769 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
770 as_primary, disk.iv_name)
777 def AssembleBlockDevice(disk, owner, as_primary):
778 """Activate a block device for an instance.
780 This is a wrapper over _RecursiveAssembleBD.
783 a /dev path for primary nodes
784 True for secondary nodes
787 result = _RecursiveAssembleBD(disk, owner, as_primary)
788 if isinstance(result, bdev.BlockDev):
789 result = result.dev_path
793 def ShutdownBlockDevice(disk):
794 """Shut down a block device.
796 First, if the device is assembled (can `Attach()`), then the device
797 is shutdown. Then the children of the device are shutdown.
799 This function is called recursively. Note that we don't cache the
800 children or such, as oppossed to assemble, shutdown of different
801 devices doesn't require that the upper device was active.
804 r_dev = _RecursiveFindBD(disk)
805 if r_dev is not None:
806 r_path = r_dev.dev_path
807 result = r_dev.Shutdown()
809 DevCacheManager.RemoveCache(r_path)
813 for child in disk.children:
814 result = result and ShutdownBlockDevice(child)
818 def MirrorAddChildren(parent_cdev, new_cdevs):
819 """Extend a mirrored block device.
822 parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
823 if parent_bdev is None:
824 logger.Error("Can't find parent device")
826 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
827 if new_bdevs.count(None) > 0:
828 logger.Error("Can't find new device(s) to add: %s:%s" %
829 (new_bdevs, new_cdevs))
831 parent_bdev.AddChildren(new_bdevs)
835 def MirrorRemoveChildren(parent_cdev, new_cdevs):
836 """Shrink a mirrored block device.
839 parent_bdev = _RecursiveFindBD(parent_cdev)
840 if parent_bdev is None:
841 logger.Error("Can't find parent in remove children: %s" % parent_cdev)
844 for disk in new_cdevs:
845 rpath = disk.StaticDevPath()
847 bd = _RecursiveFindBD(disk)
849 logger.Error("Can't find dynamic device %s while removing children" %
853 devs.append(bd.dev_path)
856 parent_bdev.RemoveChildren(devs)
860 def GetMirrorStatus(disks):
861 """Get the mirroring status of a list of devices.
864 disks: list of `objects.Disk`
867 list of (mirror_done, estimated_time) tuples, which
868 are the result of bdev.BlockDevice.CombinedSyncStatus()
873 rbd = _RecursiveFindBD(dsk)
875 raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
876 stats.append(rbd.CombinedSyncStatus())
880 def _RecursiveFindBD(disk, allow_partial=False):
881 """Check if a device is activated.
883 If so, return informations about the real device.
886 disk: the objects.Disk instance
887 allow_partial: don't abort the find if a child of the
888 device can't be found; this is intended to be
889 used when repairing mirrors
892 None if the device can't be found
893 otherwise the device instance
898 for chdisk in disk.children:
899 children.append(_RecursiveFindBD(chdisk))
901 return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
904 def FindBlockDevice(disk):
905 """Check if a device is activated.
907 If so, return informations about the real device.
910 disk: the objects.Disk instance
912 None if the device can't be found
913 (device_path, major, minor, sync_percent, estimated_time, is_degraded)
916 rbd = _RecursiveFindBD(disk)
919 return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
922 def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
923 """Write a file to the filesystem.
925 This allows the master to overwrite(!) a file. It will only perform
926 the operation if the file belongs to a list of configuration files.
929 if not os.path.isabs(file_name):
930 logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
935 constants.CLUSTER_CONF_FILE,
937 constants.SSH_KNOWN_HOSTS_FILE,
939 allowed_files.extend(ssconf.SimpleStore().GetFileList())
940 if file_name not in allowed_files:
941 logger.Error("Filename passed to UploadFile not in allowed"
942 " upload targets: '%s'" % file_name)
945 utils.WriteFile(file_name, data=data, mode=mode, uid=uid, gid=gid,
946 atime=atime, mtime=mtime)
950 def _ErrnoOrStr(err):
951 """Format an EnvironmentError exception.
953 If the `err` argument has an errno attribute, it will be looked up
954 and converted into a textual EXXXX description. Otherwise the string
955 representation of the error will be returned.
958 if hasattr(err, 'errno'):
959 detail = errno.errorcode[err.errno]
965 def _OSOndiskVersion(name, os_dir):
966 """Compute and return the API version of a given OS.
968 This function will try to read the API version of the os given by
969 the 'name' parameter and residing in the 'os_dir' directory.
971 Return value will be either an integer denoting the version or None in the
972 case when this is not a valid OS name.
975 api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
978 st = os.stat(api_file)
979 except EnvironmentError, err:
980 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
981 " found (%s)" % _ErrnoOrStr(err))
983 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
984 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
990 api_version = f.read(256)
993 except EnvironmentError, err:
994 raise errors.InvalidOS(name, os_dir, "error while reading the"
995 " API version (%s)" % _ErrnoOrStr(err))
997 api_version = api_version.strip()
999 api_version = int(api_version)
1000 except (TypeError, ValueError), err:
1001 raise errors.InvalidOS(name, os_dir,
1002 "API version is not integer (%s)" % str(err))
1007 def DiagnoseOS(top_dirs=None):
1008 """Compute the validity for all OSes.
1010 Returns an OS object for each name in all the given top directories
1011 (if not given defaults to constants.OS_SEARCH_PATH)
1017 if top_dirs is None:
1018 top_dirs = constants.OS_SEARCH_PATH
1021 for dir_name in top_dirs:
1022 if os.path.isdir(dir_name):
1024 f_names = utils.ListVisibleFiles(dir_name)
1025 except EnvironmentError, err:
1026 logger.Error("Can't list the OS directory %s: %s" %
1027 (dir_name, str(err)))
1029 for name in f_names:
1031 os_inst = OSFromDisk(name, base_dir=dir_name)
1032 result.append(os_inst)
1033 except errors.InvalidOS, err:
1034 result.append(objects.OS.FromInvalidOS(err))
1039 def OSFromDisk(name, base_dir=None):
1040 """Create an OS instance from disk.
1042 This function will return an OS instance if the given name is a
1043 valid OS name. Otherwise, it will raise an appropriate
1044 `errors.InvalidOS` exception, detailing why this is not a valid
1048 os_dir: Directory containing the OS scripts. Defaults to a search
1049 in all the OS_SEARCH_PATH directories.
1053 if base_dir is None:
1054 os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
1056 raise errors.InvalidOS(name, None, "OS dir not found in search path")
1058 os_dir = os.path.sep.join([base_dir, name])
1060 api_version = _OSOndiskVersion(name, os_dir)
1062 if api_version != constants.OS_API_VERSION:
1063 raise errors.InvalidOS(name, os_dir, "API version mismatch"
1064 " (found %s want %s)"
1065 % (api_version, constants.OS_API_VERSION))
1067 # OS Scripts dictionary, we will populate it with the actual script names
1068 os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1070 for script in os_scripts:
1071 os_scripts[script] = os.path.sep.join([os_dir, script])
1074 st = os.stat(os_scripts[script])
1075 except EnvironmentError, err:
1076 raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1077 (script, _ErrnoOrStr(err)))
1079 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1080 raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1083 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1084 raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1088 return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1089 create_script=os_scripts['create'],
1090 export_script=os_scripts['export'],
1091 import_script=os_scripts['import'],
1092 rename_script=os_scripts['rename'],
1093 api_version=api_version)
1096 def SnapshotBlockDevice(disk):
1097 """Create a snapshot copy of a block device.
1099 This function is called recursively, and the snapshot is actually created
1100 just for the leaf lvm backend device.
1103 disk: the disk to be snapshotted
1106 a config entry for the actual lvm device snapshotted.
1110 if len(disk.children) == 1:
1111 # only one child, let's recurse on it
1112 return SnapshotBlockDevice(disk.children[0])
1114 # more than one child, choose one that matches
1115 for child in disk.children:
1116 if child.size == disk.size:
1117 # return implies breaking the loop
1118 return SnapshotBlockDevice(child)
1119 elif disk.dev_type == constants.LD_LV:
1120 r_dev = _RecursiveFindBD(disk)
1121 if r_dev is not None:
1122 # let's stay on the safe side and ask for the full size, for now
1123 return r_dev.Snapshot(disk.size)
1127 raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1128 " '%s' of type '%s'" %
1129 (disk.unique_id, disk.dev_type))
1132 def ExportSnapshot(disk, dest_node, instance):
1133 """Export a block device snapshot to a remote node.
1136 disk: the snapshot block device
1137 dest_node: the node to send the image to
1138 instance: instance being exported
1141 True if successful, False otherwise.
1144 inst_os = OSFromDisk(instance.os)
1145 export_script = inst_os.export_script
1147 logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1148 instance.name, int(time.time()))
1149 if not os.path.exists(constants.LOG_OS_DIR):
1150 os.mkdir(constants.LOG_OS_DIR, 0750)
1152 real_os_dev = _RecursiveFindBD(disk)
1153 if real_os_dev is None:
1154 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1158 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1159 destfile = disk.physical_id[1]
1161 # the target command is built out of three individual commands,
1162 # which are joined by pipes; we check each individual command for
1165 expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1166 export_script, instance.name,
1167 real_os_dev.dev_path, logfile)
1171 destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1172 destdir, destdir, destfile)
1173 remotecmd = _GetSshRunner().BuildCmd(dest_node, constants.GANETI_RUNAS,
1176 # all commands have been checked, so we're safe to combine them
1177 command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1179 result = utils.RunCmd(command)
1182 logger.Error("os snapshot export command '%s' returned error: %s"
1184 (command, result.fail_reason, result.output))
1190 def FinalizeExport(instance, snap_disks):
1191 """Write out the export configuration information.
1194 instance: instance configuration
1195 snap_disks: snapshot block devices
1198 False in case of error, True otherwise.
1201 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1202 finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1204 config = objects.SerializableConfigParser()
1206 config.add_section(constants.INISECT_EXP)
1207 config.set(constants.INISECT_EXP, 'version', '0')
1208 config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1209 config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1210 config.set(constants.INISECT_EXP, 'os', instance.os)
1211 config.set(constants.INISECT_EXP, 'compression', 'gzip')
1213 config.add_section(constants.INISECT_INS)
1214 config.set(constants.INISECT_INS, 'name', instance.name)
1215 config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1216 config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1217 config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1220 for nic_count, nic in enumerate(instance.nics):
1221 config.set(constants.INISECT_INS, 'nic%d_mac' %
1222 nic_count, '%s' % nic.mac)
1223 config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1224 config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1225 # TODO: redundant: on load can read nics until it doesn't exist
1226 config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1229 for disk_count, disk in enumerate(snap_disks):
1230 config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1231 ('%s' % disk.iv_name))
1232 config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1233 ('%s' % disk.physical_id[1]))
1234 config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1236 config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1238 cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1239 cfo = open(cff, 'w')
1245 shutil.rmtree(finaldestdir, True)
1246 shutil.move(destdir, finaldestdir)
1251 def ExportInfo(dest):
1252 """Get export configuration information.
1255 dest: directory containing the export
1258 A serializable config file containing the export info.
1261 cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1263 config = objects.SerializableConfigParser()
1266 if (not config.has_section(constants.INISECT_EXP) or
1267 not config.has_section(constants.INISECT_INS)):
1273 def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1274 """Import an os image into an instance.
1277 instance: the instance object
1278 os_disk: the instance-visible name of the os device
1279 swap_disk: the instance-visible name of the swap device
1280 src_node: node holding the source image
1281 src_image: path to the source image on src_node
1284 False in case of error, True otherwise.
1287 inst_os = OSFromDisk(instance.os)
1288 import_script = inst_os.import_script
1290 os_device = instance.FindDisk(os_disk)
1291 if os_device is None:
1292 logger.Error("Can't find this device-visible name '%s'" % os_disk)
1295 swap_device = instance.FindDisk(swap_disk)
1296 if swap_device is None:
1297 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1300 real_os_dev = _RecursiveFindBD(os_device)
1301 if real_os_dev is None:
1302 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1306 real_swap_dev = _RecursiveFindBD(swap_device)
1307 if real_swap_dev is None:
1308 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1310 real_swap_dev.Open()
1312 logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1313 instance.name, int(time.time()))
1314 if not os.path.exists(constants.LOG_OS_DIR):
1315 os.mkdir(constants.LOG_OS_DIR, 0750)
1317 destcmd = utils.BuildShellCmd('cat %s', src_image)
1318 remotecmd = _GetSshRunner().BuildCmd(src_node, constants.GANETI_RUNAS,
1322 impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1323 inst_os.path, import_script, instance.name,
1324 real_os_dev.dev_path, real_swap_dev.dev_path,
1327 command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1329 result = utils.RunCmd(command)
1332 logger.Error("os import command '%s' returned error: %s"
1334 (command, result.fail_reason, result.output))
1341 """Return a list of exports currently available on this machine.
1344 if os.path.isdir(constants.EXPORT_DIR):
1345 return utils.ListVisibleFiles(constants.EXPORT_DIR)
1350 def RemoveExport(export):
1351 """Remove an existing export from the node.
1354 export: the name of the export to remove
1357 False in case of error, True otherwise.
1360 target = os.path.join(constants.EXPORT_DIR, export)
1362 shutil.rmtree(target)
1363 # TODO: catch some of the relevant exceptions and provide a pretty
1364 # error message if rmtree fails.
1369 def RenameBlockDevices(devlist):
1370 """Rename a list of block devices.
1372 The devlist argument is a list of tuples (disk, new_logical,
1373 new_physical). The return value will be a combined boolean result
1374 (True only if all renames succeeded).
1378 for disk, unique_id in devlist:
1379 dev = _RecursiveFindBD(disk)
1384 old_rpath = dev.dev_path
1385 dev.Rename(unique_id)
1386 new_rpath = dev.dev_path
1387 if old_rpath != new_rpath:
1388 DevCacheManager.RemoveCache(old_rpath)
1389 # FIXME: we should add the new cache information here, like:
1390 # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1391 # but we don't have the owner here - maybe parse from existing
1392 # cache? for now, we only lose lvm data when we rename, which
1393 # is less critical than DRBD or MD
1394 except errors.BlockDeviceError, err:
1395 logger.Error("Can't rename device '%s' to '%s': %s" %
1396 (dev, unique_id, err))
1401 def _TransformFileStorageDir(file_storage_dir):
1402 """Checks whether given file_storage_dir is valid.
1404 Checks wheter the given file_storage_dir is within the cluster-wide
1405 default file_storage_dir stored in SimpleStore. Only paths under that
1406 directory are allowed.
1409 file_storage_dir: string with path
1412 normalized file_storage_dir (string) if valid, None otherwise
1415 file_storage_dir = os.path.normpath(file_storage_dir)
1416 base_file_storage_dir = ssconf.SimpleStore().GetFileStorageDir()
1417 if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
1418 base_file_storage_dir):
1419 logger.Error("file storage directory '%s' is not under base file"
1420 " storage directory '%s'" %
1421 (file_storage_dir, base_file_storage_dir))
1423 return file_storage_dir
1426 def CreateFileStorageDir(file_storage_dir):
1427 """Create file storage directory.
1430 file_storage_dir: string containing the path
1433 tuple with first element a boolean indicating wheter dir
1434 creation was successful or not
1437 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1439 if not file_storage_dir:
1442 if os.path.exists(file_storage_dir):
1443 if not os.path.isdir(file_storage_dir):
1444 logger.Error("'%s' is not a directory" % file_storage_dir)
1448 os.makedirs(file_storage_dir, 0750)
1449 except OSError, err:
1450 logger.Error("Cannot create file storage directory '%s': %s" %
1451 (file_storage_dir, err))
1456 def RemoveFileStorageDir(file_storage_dir):
1457 """Remove file storage directory.
1459 Remove it only if it's empty. If not log an error and return.
1462 file_storage_dir: string containing the path
1465 tuple with first element a boolean indicating wheter dir
1466 removal was successful or not
1469 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1471 if not file_storage_dir:
1474 if os.path.exists(file_storage_dir):
1475 if not os.path.isdir(file_storage_dir):
1476 logger.Error("'%s' is not a directory" % file_storage_dir)
1478 # deletes dir only if empty, otherwise we want to return False
1480 os.rmdir(file_storage_dir)
1481 except OSError, err:
1482 logger.Error("Cannot remove file storage directory '%s': %s" %
1483 (file_storage_dir, err))
1488 def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
1489 """Rename the file storage directory.
1492 old_file_storage_dir: string containing the old path
1493 new_file_storage_dir: string containing the new path
1496 tuple with first element a boolean indicating wheter dir
1497 rename was successful or not
1500 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
1501 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
1503 if not old_file_storage_dir or not new_file_storage_dir:
1506 if not os.path.exists(new_file_storage_dir):
1507 if os.path.isdir(old_file_storage_dir):
1509 os.rename(old_file_storage_dir, new_file_storage_dir)
1510 except OSError, err:
1511 logger.Error("Cannot rename '%s' to '%s': %s"
1512 % (old_file_storage_dir, new_file_storage_dir, err))
1515 logger.Error("'%s' is not a directory" % old_file_storage_dir)
1518 if os.path.exists(old_file_storage_dir):
1519 logger.Error("Cannot rename '%s' to '%s'. Both locations exist." %
1520 old_file_storage_dir, new_file_storage_dir)
1525 class HooksRunner(object):
1528 This class is instantiated on the node side (ganeti-noded) and not on
1532 RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1534 def __init__(self, hooks_base_dir=None):
1535 """Constructor for hooks runner.
1538 - hooks_base_dir: if not None, this overrides the
1539 constants.HOOKS_BASE_DIR (useful for unittests)
1542 if hooks_base_dir is None:
1543 hooks_base_dir = constants.HOOKS_BASE_DIR
1544 self._BASE_DIR = hooks_base_dir
1547 def ExecHook(script, env):
1548 """Exec one hook script.
1551 - script: the full path to the script
1552 - env: the environment with which to exec the script
1555 # exec the process using subprocess and log the output
1558 fdstdin = open("/dev/null", "r")
1559 child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1560 stderr=subprocess.STDOUT, close_fds=True,
1561 shell=False, cwd="/", env=env)
1564 output = child.stdout.read(4096)
1565 child.stdout.close()
1566 except EnvironmentError, err:
1567 output += "Hook script error: %s" % str(err)
1571 result = child.wait()
1573 except EnvironmentError, err:
1574 if err.errno == errno.EINTR:
1578 # try not to leak fds
1579 for fd in (fdstdin, ):
1583 except EnvironmentError, err:
1584 # just log the error
1585 #logger.Error("While closing fd %s: %s" % (fd, err))
1588 return result == 0, output
1590 def RunHooks(self, hpath, phase, env):
1591 """Run the scripts in the hooks directory.
1593 This method will not be usually overriden by child opcodes.
1596 if phase == constants.HOOKS_PHASE_PRE:
1598 elif phase == constants.HOOKS_PHASE_POST:
1601 raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1604 subdir = "%s-%s.d" % (hpath, suffix)
1605 dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1607 dir_contents = utils.ListVisibleFiles(dir_name)
1608 except OSError, err:
1612 # we use the standard python sort order,
1613 # so 00name is the recommended naming scheme
1615 for relname in dir_contents:
1616 fname = os.path.join(dir_name, relname)
1617 if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1618 self.RE_MASK.match(relname) is not None):
1619 rrval = constants.HKR_SKIP
1622 result, output = self.ExecHook(fname, env)
1624 rrval = constants.HKR_FAIL
1626 rrval = constants.HKR_SUCCESS
1627 rr.append(("%s/%s" % (subdir, relname), rrval, output))
1632 class IAllocatorRunner(object):
1633 """IAllocator runner.
1635 This class is instantiated on the node side (ganeti-noded) and not on
1639 def Run(self, name, idata):
1640 """Run an iallocator script.
1642 Return value: tuple of:
1643 - run status (one of the IARUN_ constants)
1646 - fail reason (as from utils.RunResult)
1649 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
1651 if alloc_script is None:
1652 return (constants.IARUN_NOTFOUND, None, None, None)
1654 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
1658 result = utils.RunCmd([alloc_script, fin_name])
1660 return (constants.IARUN_FAILURE, result.stdout, result.stderr,
1665 return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
1668 class DevCacheManager(object):
1669 """Simple class for managing a cache of block device information.
1672 _DEV_PREFIX = "/dev/"
1673 _ROOT_DIR = constants.BDEV_CACHE_DIR
1676 def _ConvertPath(cls, dev_path):
1677 """Converts a /dev/name path to the cache file name.
1679 This replaces slashes with underscores and strips the /dev
1680 prefix. It then returns the full path to the cache file
1683 if dev_path.startswith(cls._DEV_PREFIX):
1684 dev_path = dev_path[len(cls._DEV_PREFIX):]
1685 dev_path = dev_path.replace("/", "_")
1686 fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1690 def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1691 """Updates the cache information for a given device.
1694 if dev_path is None:
1695 logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1697 fpath = cls._ConvertPath(dev_path)
1703 iv_name = "not_visible"
1704 fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1706 utils.WriteFile(fpath, data=fdata)
1707 except EnvironmentError, err:
1708 logger.Error("Can't update bdev cache for %s, error %s" %
1709 (dev_path, str(err)))
1712 def RemoveCache(cls, dev_path):
1713 """Remove data for a dev_path.
1716 if dev_path is None:
1717 logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1719 fpath = cls._ConvertPath(dev_path)
1721 utils.RemoveFile(fpath)
1722 except EnvironmentError, err:
1723 logger.Error("Can't update bdev cache for %s, error %s" %
1724 (dev_path, str(err)))