4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Functions used by the node daemon"""
34 from ganeti import logger
35 from ganeti import errors
36 from ganeti import utils
37 from ganeti import ssh
38 from ganeti import hypervisor
39 from ganeti import constants
40 from ganeti import bdev
41 from ganeti import objects
42 from ganeti import ssconf
46 return ssh.SshRunner()
50 """Activate local node as master node.
52 There are two needed steps for this:
53 - run the master script
54 - register the cron script
57 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
60 logger.Error("could not activate cluster interface with command %s,"
61 " error: '%s'" % (result.cmd, result.output))
68 """Deactivate this node as master.
70 This runs the master stop script.
73 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
76 logger.Error("could not deactivate cluster interface with command %s,"
77 " error: '%s'" % (result.cmd, result.output))
83 def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
84 """Joins this node to the cluster.
86 This does the following:
87 - updates the hostkeys of the machine (rsa and dsa)
88 - adds the ssh private key to the user
89 - adds the ssh public key to the users' authorized_keys file
92 sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
93 (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
94 (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
95 (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
96 for name, content, mode in sshd_keys:
97 utils.WriteFile(name, data=content, mode=mode)
100 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
102 except errors.OpExecError, err:
103 logger.Error("Error while processing user ssh files: %s" % err)
106 for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
107 utils.WriteFile(name, data=content, mode=0600)
109 utils.AddAuthorizedKey(auth_keys, sshpub)
111 utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
117 """Cleans up the current node and prepares it to be removed from the cluster.
120 if os.path.isdir(constants.DATA_DIR):
121 for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
122 full_name = os.path.join(constants.DATA_DIR, rel_name)
123 if os.path.isfile(full_name) and not os.path.islink(full_name):
124 utils.RemoveFile(full_name)
127 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
128 except errors.OpExecError, err:
129 logger.Error("Error while processing ssh files: %s" % err)
132 f = open(pub_key, 'r')
134 utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
138 utils.RemoveFile(priv_key)
139 utils.RemoveFile(pub_key)
142 def GetNodeInfo(vgname):
143 """Gives back a hash with different informations about the node.
146 { 'vg_size' : xxx, 'vg_free' : xxx, 'memory_domain0': xxx,
147 'memory_free' : xxx, 'memory_total' : xxx }
149 vg_size is the size of the configured volume group in MiB
150 vg_free is the free size of the volume group in MiB
151 memory_dom0 is the memory allocated for domain0 in MiB
152 memory_free is the currently available (free) ram in MiB
153 memory_total is the total number of ram in MiB
157 vginfo = _GetVGInfo(vgname)
158 outputarray['vg_size'] = vginfo['vg_size']
159 outputarray['vg_free'] = vginfo['vg_free']
161 hyper = hypervisor.GetHypervisor()
162 hyp_info = hyper.GetNodeInfo()
163 if hyp_info is not None:
164 outputarray.update(hyp_info)
166 f = open("/proc/sys/kernel/random/boot_id", 'r')
168 outputarray["bootid"] = f.read(128).rstrip("\n")
175 def VerifyNode(what):
176 """Verify the status of the local node.
179 what - a dictionary of things to check:
180 'filelist' : list of files for which to compute checksums
181 'nodelist' : list of nodes we should check communication with
182 'hypervisor': run the hypervisor-specific verify
184 Requested files on local node are checksummed and the result returned.
186 The nodelist is traversed, with the following checks being made
188 - known_hosts key correct
189 - correct resolving of node name (target node returns its own hostname
190 by ssh-execution of 'hostname', result compared against name in list.
195 if 'hypervisor' in what:
196 result['hypervisor'] = hypervisor.GetHypervisor().Verify()
198 if 'filelist' in what:
199 result['filelist'] = utils.FingerprintFiles(what['filelist'])
201 if 'nodelist' in what:
202 result['nodelist'] = {}
203 for node in what['nodelist']:
204 success, message = _GetSshRunner().VerifyNodeHostname(node)
206 result['nodelist'][node] = message
210 def GetVolumeList(vg_name):
211 """Compute list of logical volumes and their size.
214 dictionary of all partions (key) with their size (in MiB), inactive
216 {'test1': ('20.06', True, True)}
221 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
222 "--separator=%s" % sep,
223 "-olv_name,lv_size,lv_attr", vg_name])
225 logger.Error("Failed to list logical volumes, lvs output: %s" %
229 for line in result.stdout.splitlines():
230 line = line.strip().rstrip(sep)
231 name, size, attr = line.split(sep)
234 inactive = attr[4] == '-'
235 online = attr[5] == 'o'
236 lvs[name] = (size, inactive, online)
241 def ListVolumeGroups():
242 """List the volume groups and their size.
245 Dictionary with keys volume name and values the size of the volume
248 return utils.ListVolumeGroups()
252 """List all volumes on this node.
255 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
257 "--options=lv_name,lv_size,devices,vg_name"])
259 logger.Error("Failed to list logical volumes, lvs output: %s" %
265 return dev.split('(')[0]
271 'name': line[0].strip(),
272 'size': line[1].strip(),
273 'dev': parse_dev(line[2].strip()),
274 'vg': line[3].strip(),
277 return [map_line(line.split('|')) for line in result.stdout.splitlines()]
280 def BridgesExist(bridges_list):
281 """Check if a list of bridges exist on the current node.
284 True if all of them exist, false otherwise
287 for bridge in bridges_list:
288 if not utils.BridgeExists(bridge):
294 def GetInstanceList():
295 """Provides a list of instances.
298 A list of all running instances on the current node
299 - instance1.example.com
300 - instance2.example.com
304 names = hypervisor.GetHypervisor().ListInstances()
305 except errors.HypervisorError, err:
306 logger.Error("error enumerating instances: %s" % str(err))
312 def GetInstanceInfo(instance):
313 """Gives back the informations about an instance as a dictionary.
316 instance: name of the instance (ex. instance1.example.com)
319 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
321 memory: memory size of instance (int)
322 state: xen state of instance (string)
323 time: cpu time of instance (float)
328 iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
329 if iinfo is not None:
330 output['memory'] = iinfo[2]
331 output['state'] = iinfo[4]
332 output['time'] = iinfo[5]
337 def GetAllInstancesInfo():
338 """Gather data about all instances.
340 This is the equivalent of `GetInstanceInfo()`, except that it
341 computes data for all instances at once, thus being faster if one
342 needs data about more than one instance.
344 Returns: a dictionary of dictionaries, keys being the instance name,
346 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
348 memory: memory size of instance (int)
349 state: xen state of instance (string)
350 time: cpu time of instance (float)
351 vcpus: the number of cpus
356 iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
358 for name, inst_id, memory, vcpus, state, times in iinfo:
369 def AddOSToInstance(instance, os_disk, swap_disk):
370 """Add an OS to an instance.
373 instance: the instance object
374 os_disk: the instance-visible name of the os device
375 swap_disk: the instance-visible name of the swap device
378 inst_os = OSFromDisk(instance.os)
380 create_script = inst_os.create_script
382 os_device = instance.FindDisk(os_disk)
383 if os_device is None:
384 logger.Error("Can't find this device-visible name '%s'" % os_disk)
387 swap_device = instance.FindDisk(swap_disk)
388 if swap_device is None:
389 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
392 real_os_dev = _RecursiveFindBD(os_device)
393 if real_os_dev is None:
394 raise errors.BlockDeviceError("Block device '%s' is not set up" %
398 real_swap_dev = _RecursiveFindBD(swap_device)
399 if real_swap_dev is None:
400 raise errors.BlockDeviceError("Block device '%s' is not set up" %
404 logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
405 instance.name, int(time.time()))
406 if not os.path.exists(constants.LOG_OS_DIR):
407 os.mkdir(constants.LOG_OS_DIR, 0750)
409 command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
410 inst_os.path, create_script, instance.name,
411 real_os_dev.dev_path, real_swap_dev.dev_path,
414 result = utils.RunCmd(command)
416 logger.Error("os create command '%s' returned error: %s, logfile: %s,"
418 (command, result.fail_reason, logfile, result.output))
424 def RunRenameInstance(instance, old_name, os_disk, swap_disk):
425 """Run the OS rename script for an instance.
428 instance: the instance object
429 old_name: the old name of the instance
430 os_disk: the instance-visible name of the os device
431 swap_disk: the instance-visible name of the swap device
434 inst_os = OSFromDisk(instance.os)
436 script = inst_os.rename_script
438 os_device = instance.FindDisk(os_disk)
439 if os_device is None:
440 logger.Error("Can't find this device-visible name '%s'" % os_disk)
443 swap_device = instance.FindDisk(swap_disk)
444 if swap_device is None:
445 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
448 real_os_dev = _RecursiveFindBD(os_device)
449 if real_os_dev is None:
450 raise errors.BlockDeviceError("Block device '%s' is not set up" %
454 real_swap_dev = _RecursiveFindBD(swap_device)
455 if real_swap_dev is None:
456 raise errors.BlockDeviceError("Block device '%s' is not set up" %
460 logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
462 instance.name, int(time.time()))
463 if not os.path.exists(constants.LOG_OS_DIR):
464 os.mkdir(constants.LOG_OS_DIR, 0750)
466 command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
467 inst_os.path, script, old_name, instance.name,
468 real_os_dev.dev_path, real_swap_dev.dev_path,
471 result = utils.RunCmd(command)
474 logger.Error("os create command '%s' returned error: %s"
476 (command, result.fail_reason, result.output))
482 def _GetVGInfo(vg_name):
483 """Get informations about the volume group.
486 vg_name: the volume group
489 { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
491 vg_size is the total size of the volume group in MiB
492 vg_free is the free size of the volume group in MiB
493 pv_count are the number of physical disks in that vg
495 If an error occurs during gathering of data, we return the same dict
496 with keys all set to None.
499 retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
501 retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
502 "--nosuffix", "--units=m", "--separator=:", vg_name])
505 errmsg = "volume group %s not present" % vg_name
508 valarr = retval.stdout.strip().rstrip(':').split(':')
512 "vg_size": int(round(float(valarr[0]), 0)),
513 "vg_free": int(round(float(valarr[1]), 0)),
514 "pv_count": int(valarr[2]),
516 except ValueError, err:
517 logger.Error("Fail to parse vgs output: %s" % str(err))
519 logger.Error("vgs output has the wrong number of fields (expected"
520 " three): %s" % str(valarr))
524 def _GatherBlockDevs(instance):
525 """Set up an instance's block device(s).
527 This is run on the primary node at instance startup. The block
528 devices must be already assembled.
532 for disk in instance.disks:
533 device = _RecursiveFindBD(disk)
535 raise errors.BlockDeviceError("Block device '%s' is not set up." %
538 block_devices.append((disk, device))
542 def StartInstance(instance, extra_args):
543 """Start an instance.
546 instance - name of instance to start.
549 running_instances = GetInstanceList()
551 if instance.name in running_instances:
554 block_devices = _GatherBlockDevs(instance)
555 hyper = hypervisor.GetHypervisor()
558 hyper.StartInstance(instance, block_devices, extra_args)
559 except errors.HypervisorError, err:
560 logger.Error("Failed to start instance: %s" % err)
566 def ShutdownInstance(instance):
567 """Shut an instance down.
570 instance - name of instance to shutdown.
573 running_instances = GetInstanceList()
575 if instance.name not in running_instances:
578 hyper = hypervisor.GetHypervisor()
580 hyper.StopInstance(instance)
581 except errors.HypervisorError, err:
582 logger.Error("Failed to stop instance: %s" % err)
585 # test every 10secs for 2min
589 for dummy in range(11):
590 if instance.name not in GetInstanceList():
594 # the shutdown did not succeed
595 logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
598 hyper.StopInstance(instance, force=True)
599 except errors.HypervisorError, err:
600 logger.Error("Failed to stop instance: %s" % err)
604 if instance.name in GetInstanceList():
605 logger.Error("could not shutdown instance '%s' even by destroy")
611 def RebootInstance(instance, reboot_type, extra_args):
612 """Reboot an instance.
615 instance - name of instance to reboot
616 reboot_type - how to reboot [soft,hard,full]
619 running_instances = GetInstanceList()
621 if instance.name not in running_instances:
622 logger.Error("Cannot reboot instance that is not running")
625 hyper = hypervisor.GetHypervisor()
626 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
628 hyper.RebootInstance(instance)
629 except errors.HypervisorError, err:
630 logger.Error("Failed to soft reboot instance: %s" % err)
632 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
634 ShutdownInstance(instance)
635 StartInstance(instance, extra_args)
636 except errors.HypervisorError, err:
637 logger.Error("Failed to hard reboot instance: %s" % err)
640 raise errors.ParameterError("reboot_type invalid")
646 def CreateBlockDevice(disk, size, owner, on_primary, info):
647 """Creates a block device for an instance.
650 disk: a ganeti.objects.Disk object
651 size: the size of the physical underlying device
652 owner: a string with the name of the instance
653 on_primary: a boolean indicating if it is the primary node or not
654 info: string that will be sent to the physical device creation
657 the new unique_id of the device (this can sometime be
658 computed only after creation), or None. On secondary nodes,
659 it's not required to return anything.
664 for child in disk.children:
665 crdev = _RecursiveAssembleBD(child, owner, on_primary)
666 if on_primary or disk.AssembleOnSecondary():
667 # we need the children open in case the device itself has to
672 device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
673 if device is not None:
674 logger.Info("removing existing device %s" % disk)
676 except errors.BlockDeviceError, err:
679 device = bdev.Create(disk.dev_type, disk.physical_id,
682 raise ValueError("Can't create child device for %s, %s" %
684 if on_primary or disk.AssembleOnSecondary():
685 if not device.Assemble():
686 errorstring = "Can't assemble device after creation"
687 logger.Error(errorstring)
688 raise errors.BlockDeviceError("%s, very unusual event - check the node"
689 " daemon logs" % errorstring)
690 device.SetSyncSpeed(constants.SYNC_SPEED)
691 if on_primary or disk.OpenOnSecondary():
692 device.Open(force=True)
693 DevCacheManager.UpdateCache(device.dev_path, owner,
694 on_primary, disk.iv_name)
698 physical_id = device.unique_id
702 def RemoveBlockDevice(disk):
703 """Remove a block device.
705 This is intended to be called recursively.
709 # since we are removing the device, allow a partial match
710 # this allows removal of broken mirrors
711 rdev = _RecursiveFindBD(disk, allow_partial=True)
712 except errors.BlockDeviceError, err:
713 # probably can't attach
714 logger.Info("Can't attach to device %s in remove" % disk)
717 r_path = rdev.dev_path
718 result = rdev.Remove()
720 DevCacheManager.RemoveCache(r_path)
724 for child in disk.children:
725 result = result and RemoveBlockDevice(child)
729 def _RecursiveAssembleBD(disk, owner, as_primary):
730 """Activate a block device for an instance.
732 This is run on the primary and secondary nodes for an instance.
734 This function is called recursively.
737 disk: a objects.Disk object
738 as_primary: if we should make the block device read/write
741 the assembled device or None (in case no device was assembled)
743 If the assembly is not successful, an exception is raised.
748 mcn = disk.ChildrenNeeded()
750 mcn = 0 # max number of Nones allowed
752 mcn = len(disk.children) - mcn # max number of Nones
753 for chld_disk in disk.children:
755 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
756 except errors.BlockDeviceError, err:
757 if children.count(None) >= mcn:
760 logger.Debug("Error in child activation: %s" % str(err))
761 children.append(cdev)
763 if as_primary or disk.AssembleOnSecondary():
764 r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
765 r_dev.SetSyncSpeed(constants.SYNC_SPEED)
767 if as_primary or disk.OpenOnSecondary():
769 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
770 as_primary, disk.iv_name)
777 def AssembleBlockDevice(disk, owner, as_primary):
778 """Activate a block device for an instance.
780 This is a wrapper over _RecursiveAssembleBD.
783 a /dev path for primary nodes
784 True for secondary nodes
787 result = _RecursiveAssembleBD(disk, owner, as_primary)
788 if isinstance(result, bdev.BlockDev):
789 result = result.dev_path
793 def ShutdownBlockDevice(disk):
794 """Shut down a block device.
796 First, if the device is assembled (can `Attach()`), then the device
797 is shutdown. Then the children of the device are shutdown.
799 This function is called recursively. Note that we don't cache the
800 children or such, as oppossed to assemble, shutdown of different
801 devices doesn't require that the upper device was active.
804 r_dev = _RecursiveFindBD(disk)
805 if r_dev is not None:
806 r_path = r_dev.dev_path
807 result = r_dev.Shutdown()
809 DevCacheManager.RemoveCache(r_path)
813 for child in disk.children:
814 result = result and ShutdownBlockDevice(child)
818 def MirrorAddChildren(parent_cdev, new_cdevs):
819 """Extend a mirrored block device.
822 parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
823 if parent_bdev is None:
824 logger.Error("Can't find parent device")
826 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
827 if new_bdevs.count(None) > 0:
828 logger.Error("Can't find new device(s) to add: %s:%s" %
829 (new_bdevs, new_cdevs))
831 parent_bdev.AddChildren(new_bdevs)
835 def MirrorRemoveChildren(parent_cdev, new_cdevs):
836 """Shrink a mirrored block device.
839 parent_bdev = _RecursiveFindBD(parent_cdev)
840 if parent_bdev is None:
841 logger.Error("Can't find parent in remove children: %s" % parent_cdev)
844 for disk in new_cdevs:
845 rpath = disk.StaticDevPath()
847 bd = _RecursiveFindBD(disk)
849 logger.Error("Can't find dynamic device %s while removing children" %
853 devs.append(bd.dev_path)
856 parent_bdev.RemoveChildren(devs)
860 def GetMirrorStatus(disks):
861 """Get the mirroring status of a list of devices.
864 disks: list of `objects.Disk`
867 list of (mirror_done, estimated_time) tuples, which
868 are the result of bdev.BlockDevice.CombinedSyncStatus()
873 rbd = _RecursiveFindBD(dsk)
875 raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
876 stats.append(rbd.CombinedSyncStatus())
880 def _RecursiveFindBD(disk, allow_partial=False):
881 """Check if a device is activated.
883 If so, return informations about the real device.
886 disk: the objects.Disk instance
887 allow_partial: don't abort the find if a child of the
888 device can't be found; this is intended to be
889 used when repairing mirrors
892 None if the device can't be found
893 otherwise the device instance
898 for chdisk in disk.children:
899 children.append(_RecursiveFindBD(chdisk))
901 return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
904 def FindBlockDevice(disk):
905 """Check if a device is activated.
907 If so, return informations about the real device.
910 disk: the objects.Disk instance
912 None if the device can't be found
913 (device_path, major, minor, sync_percent, estimated_time, is_degraded)
916 rbd = _RecursiveFindBD(disk)
919 return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
922 def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
923 """Write a file to the filesystem.
925 This allows the master to overwrite(!) a file. It will only perform
926 the operation if the file belongs to a list of configuration files.
929 if not os.path.isabs(file_name):
930 logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
935 constants.CLUSTER_CONF_FILE,
937 constants.SSH_KNOWN_HOSTS_FILE,
939 allowed_files.extend(ssconf.SimpleStore().GetFileList())
940 if file_name not in allowed_files:
941 logger.Error("Filename passed to UploadFile not in allowed"
942 " upload targets: '%s'" % file_name)
945 utils.WriteFile(file_name, data=data, mode=mode, uid=uid, gid=gid,
946 atime=atime, mtime=mtime)
950 def _ErrnoOrStr(err):
951 """Format an EnvironmentError exception.
953 If the `err` argument has an errno attribute, it will be looked up
954 and converted into a textual EXXXX description. Otherwise the string
955 representation of the error will be returned.
958 if hasattr(err, 'errno'):
959 detail = errno.errorcode[err.errno]
965 def _OSSearch(name, search_path=None):
966 """Search for OSes with the given name in the search_path.
969 name: The name of the OS to look for
970 search_path: List of dirs to search (defaults to constants.OS_SEARCH_PATH)
973 The base_dir the OS resides in
976 if search_path is None:
977 search_path = constants.OS_SEARCH_PATH
979 for dir_name in search_path:
980 t_os_dir = os.path.sep.join([dir_name, name])
981 if os.path.isdir(t_os_dir):
987 def _OSOndiskVersion(name, os_dir):
988 """Compute and return the API version of a given OS.
990 This function will try to read the API version of the os given by
991 the 'name' parameter and residing in the 'os_dir' directory.
993 Return value will be either an integer denoting the version or None in the
994 case when this is not a valid OS name.
997 api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1000 st = os.stat(api_file)
1001 except EnvironmentError, err:
1002 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1003 " found (%s)" % _ErrnoOrStr(err))
1005 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1006 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1012 api_version = f.read(256)
1015 except EnvironmentError, err:
1016 raise errors.InvalidOS(name, os_dir, "error while reading the"
1017 " API version (%s)" % _ErrnoOrStr(err))
1019 api_version = api_version.strip()
1021 api_version = int(api_version)
1022 except (TypeError, ValueError), err:
1023 raise errors.InvalidOS(name, os_dir,
1024 "API version is not integer (%s)" % str(err))
1029 def DiagnoseOS(top_dirs=None):
1030 """Compute the validity for all OSes.
1032 Returns an OS object for each name in all the given top directories
1033 (if not given defaults to constants.OS_SEARCH_PATH)
1039 if top_dirs is None:
1040 top_dirs = constants.OS_SEARCH_PATH
1043 for dir_name in top_dirs:
1044 if os.path.isdir(dir_name):
1046 f_names = utils.ListVisibleFiles(dir_name)
1047 except EnvironmentError, err:
1048 logger.Error("Can't list the OS directory %s: %s" %
1049 (dir_name, str(err)))
1051 for name in f_names:
1053 os_inst = OSFromDisk(name, base_dir=dir_name)
1054 result.append(os_inst)
1055 except errors.InvalidOS, err:
1056 result.append(objects.OS.FromInvalidOS(err))
1061 def OSFromDisk(name, base_dir=None):
1062 """Create an OS instance from disk.
1064 This function will return an OS instance if the given name is a
1065 valid OS name. Otherwise, it will raise an appropriate
1066 `errors.InvalidOS` exception, detailing why this is not a valid
1070 os_dir: Directory containing the OS scripts. Defaults to a search
1071 in all the OS_SEARCH_PATH directories.
1075 if base_dir is None:
1076 base_dir = _OSSearch(name)
1078 if base_dir is None:
1079 raise errors.InvalidOS(name, None, "OS dir not found in search path")
1081 os_dir = os.path.sep.join([base_dir, name])
1082 api_version = _OSOndiskVersion(name, os_dir)
1084 if api_version != constants.OS_API_VERSION:
1085 raise errors.InvalidOS(name, os_dir, "API version mismatch"
1086 " (found %s want %s)"
1087 % (api_version, constants.OS_API_VERSION))
1089 # OS Scripts dictionary, we will populate it with the actual script names
1090 os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1092 for script in os_scripts:
1093 os_scripts[script] = os.path.sep.join([os_dir, script])
1096 st = os.stat(os_scripts[script])
1097 except EnvironmentError, err:
1098 raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1099 (script, _ErrnoOrStr(err)))
1101 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1102 raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1105 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1106 raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1110 return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1111 create_script=os_scripts['create'],
1112 export_script=os_scripts['export'],
1113 import_script=os_scripts['import'],
1114 rename_script=os_scripts['rename'],
1115 api_version=api_version)
1118 def SnapshotBlockDevice(disk):
1119 """Create a snapshot copy of a block device.
1121 This function is called recursively, and the snapshot is actually created
1122 just for the leaf lvm backend device.
1125 disk: the disk to be snapshotted
1128 a config entry for the actual lvm device snapshotted.
1132 if len(disk.children) == 1:
1133 # only one child, let's recurse on it
1134 return SnapshotBlockDevice(disk.children[0])
1136 # more than one child, choose one that matches
1137 for child in disk.children:
1138 if child.size == disk.size:
1139 # return implies breaking the loop
1140 return SnapshotBlockDevice(child)
1141 elif disk.dev_type == constants.LD_LV:
1142 r_dev = _RecursiveFindBD(disk)
1143 if r_dev is not None:
1144 # let's stay on the safe side and ask for the full size, for now
1145 return r_dev.Snapshot(disk.size)
1149 raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1150 " '%s' of type '%s'" %
1151 (disk.unique_id, disk.dev_type))
1154 def ExportSnapshot(disk, dest_node, instance):
1155 """Export a block device snapshot to a remote node.
1158 disk: the snapshot block device
1159 dest_node: the node to send the image to
1160 instance: instance being exported
1163 True if successful, False otherwise.
1166 inst_os = OSFromDisk(instance.os)
1167 export_script = inst_os.export_script
1169 logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1170 instance.name, int(time.time()))
1171 if not os.path.exists(constants.LOG_OS_DIR):
1172 os.mkdir(constants.LOG_OS_DIR, 0750)
1174 real_os_dev = _RecursiveFindBD(disk)
1175 if real_os_dev is None:
1176 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1180 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1181 destfile = disk.physical_id[1]
1183 # the target command is built out of three individual commands,
1184 # which are joined by pipes; we check each individual command for
1187 expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1188 export_script, instance.name,
1189 real_os_dev.dev_path, logfile)
1193 destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1194 destdir, destdir, destfile)
1195 remotecmd = _GetSshRunner().BuildCmd(dest_node, constants.GANETI_RUNAS,
1198 # all commands have been checked, so we're safe to combine them
1199 command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1201 result = utils.RunCmd(command)
1204 logger.Error("os snapshot export command '%s' returned error: %s"
1206 (command, result.fail_reason, result.output))
1212 def FinalizeExport(instance, snap_disks):
1213 """Write out the export configuration information.
1216 instance: instance configuration
1217 snap_disks: snapshot block devices
1220 False in case of error, True otherwise.
1223 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1224 finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1226 config = objects.SerializableConfigParser()
1228 config.add_section(constants.INISECT_EXP)
1229 config.set(constants.INISECT_EXP, 'version', '0')
1230 config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1231 config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1232 config.set(constants.INISECT_EXP, 'os', instance.os)
1233 config.set(constants.INISECT_EXP, 'compression', 'gzip')
1235 config.add_section(constants.INISECT_INS)
1236 config.set(constants.INISECT_INS, 'name', instance.name)
1237 config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1238 config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1239 config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1240 for nic_count, nic in enumerate(instance.nics):
1241 config.set(constants.INISECT_INS, 'nic%d_mac' %
1242 nic_count, '%s' % nic.mac)
1243 config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1244 config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1245 # TODO: redundant: on load can read nics until it doesn't exist
1246 config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1248 for disk_count, disk in enumerate(snap_disks):
1249 config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1250 ('%s' % disk.iv_name))
1251 config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1252 ('%s' % disk.physical_id[1]))
1253 config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1255 config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1257 cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1258 cfo = open(cff, 'w')
1264 shutil.rmtree(finaldestdir, True)
1265 shutil.move(destdir, finaldestdir)
1270 def ExportInfo(dest):
1271 """Get export configuration information.
1274 dest: directory containing the export
1277 A serializable config file containing the export info.
1280 cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1282 config = objects.SerializableConfigParser()
1285 if (not config.has_section(constants.INISECT_EXP) or
1286 not config.has_section(constants.INISECT_INS)):
1292 def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1293 """Import an os image into an instance.
1296 instance: the instance object
1297 os_disk: the instance-visible name of the os device
1298 swap_disk: the instance-visible name of the swap device
1299 src_node: node holding the source image
1300 src_image: path to the source image on src_node
1303 False in case of error, True otherwise.
1306 inst_os = OSFromDisk(instance.os)
1307 import_script = inst_os.import_script
1309 os_device = instance.FindDisk(os_disk)
1310 if os_device is None:
1311 logger.Error("Can't find this device-visible name '%s'" % os_disk)
1314 swap_device = instance.FindDisk(swap_disk)
1315 if swap_device is None:
1316 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1319 real_os_dev = _RecursiveFindBD(os_device)
1320 if real_os_dev is None:
1321 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1325 real_swap_dev = _RecursiveFindBD(swap_device)
1326 if real_swap_dev is None:
1327 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1329 real_swap_dev.Open()
1331 logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1332 instance.name, int(time.time()))
1333 if not os.path.exists(constants.LOG_OS_DIR):
1334 os.mkdir(constants.LOG_OS_DIR, 0750)
1336 destcmd = utils.BuildShellCmd('cat %s', src_image)
1337 remotecmd = _GetSshRunner().BuildCmd(src_node, constants.GANETI_RUNAS,
1341 impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1342 inst_os.path, import_script, instance.name,
1343 real_os_dev.dev_path, real_swap_dev.dev_path,
1346 command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1348 result = utils.RunCmd(command)
1351 logger.Error("os import command '%s' returned error: %s"
1353 (command, result.fail_reason, result.output))
1360 """Return a list of exports currently available on this machine.
1363 if os.path.isdir(constants.EXPORT_DIR):
1364 return utils.ListVisibleFiles(constants.EXPORT_DIR)
1369 def RemoveExport(export):
1370 """Remove an existing export from the node.
1373 export: the name of the export to remove
1376 False in case of error, True otherwise.
1379 target = os.path.join(constants.EXPORT_DIR, export)
1381 shutil.rmtree(target)
1382 # TODO: catch some of the relevant exceptions and provide a pretty
1383 # error message if rmtree fails.
1388 def RenameBlockDevices(devlist):
1389 """Rename a list of block devices.
1391 The devlist argument is a list of tuples (disk, new_logical,
1392 new_physical). The return value will be a combined boolean result
1393 (True only if all renames succeeded).
1397 for disk, unique_id in devlist:
1398 dev = _RecursiveFindBD(disk)
1403 old_rpath = dev.dev_path
1404 dev.Rename(unique_id)
1405 new_rpath = dev.dev_path
1406 if old_rpath != new_rpath:
1407 DevCacheManager.RemoveCache(old_rpath)
1408 # FIXME: we should add the new cache information here, like:
1409 # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1410 # but we don't have the owner here - maybe parse from existing
1411 # cache? for now, we only lose lvm data when we rename, which
1412 # is less critical than DRBD or MD
1413 except errors.BlockDeviceError, err:
1414 logger.Error("Can't rename device '%s' to '%s': %s" %
1415 (dev, unique_id, err))
1420 class HooksRunner(object):
1423 This class is instantiated on the node side (ganeti-noded) and not on
1427 RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1429 def __init__(self, hooks_base_dir=None):
1430 """Constructor for hooks runner.
1433 - hooks_base_dir: if not None, this overrides the
1434 constants.HOOKS_BASE_DIR (useful for unittests)
1435 - logs_base_dir: if not None, this overrides the
1436 constants.LOG_HOOKS_DIR (useful for unittests)
1437 - logging: enable or disable logging of script output
1440 if hooks_base_dir is None:
1441 hooks_base_dir = constants.HOOKS_BASE_DIR
1442 self._BASE_DIR = hooks_base_dir
1445 def ExecHook(script, env):
1446 """Exec one hook script.
1450 - script: the full path to the script
1451 - env: the environment with which to exec the script
1454 # exec the process using subprocess and log the output
1457 fdstdin = open("/dev/null", "r")
1458 child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1459 stderr=subprocess.STDOUT, close_fds=True,
1460 shell=False, cwd="/", env=env)
1463 output = child.stdout.read(4096)
1464 child.stdout.close()
1465 except EnvironmentError, err:
1466 output += "Hook script error: %s" % str(err)
1470 result = child.wait()
1472 except EnvironmentError, err:
1473 if err.errno == errno.EINTR:
1477 # try not to leak fds
1478 for fd in (fdstdin, ):
1482 except EnvironmentError, err:
1483 # just log the error
1484 #logger.Error("While closing fd %s: %s" % (fd, err))
1487 return result == 0, output
1489 def RunHooks(self, hpath, phase, env):
1490 """Run the scripts in the hooks directory.
1492 This method will not be usually overriden by child opcodes.
1495 if phase == constants.HOOKS_PHASE_PRE:
1497 elif phase == constants.HOOKS_PHASE_POST:
1500 raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1503 subdir = "%s-%s.d" % (hpath, suffix)
1504 dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1506 dir_contents = utils.ListVisibleFiles(dir_name)
1507 except OSError, err:
1511 # we use the standard python sort order,
1512 # so 00name is the recommended naming scheme
1514 for relname in dir_contents:
1515 fname = os.path.join(dir_name, relname)
1516 if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1517 self.RE_MASK.match(relname) is not None):
1518 rrval = constants.HKR_SKIP
1521 result, output = self.ExecHook(fname, env)
1523 rrval = constants.HKR_FAIL
1525 rrval = constants.HKR_SUCCESS
1526 rr.append(("%s/%s" % (subdir, relname), rrval, output))
1531 class DevCacheManager(object):
1532 """Simple class for managing a cache of block device information.
1535 _DEV_PREFIX = "/dev/"
1536 _ROOT_DIR = constants.BDEV_CACHE_DIR
1539 def _ConvertPath(cls, dev_path):
1540 """Converts a /dev/name path to the cache file name.
1542 This replaces slashes with underscores and strips the /dev
1543 prefix. It then returns the full path to the cache file
1546 if dev_path.startswith(cls._DEV_PREFIX):
1547 dev_path = dev_path[len(cls._DEV_PREFIX):]
1548 dev_path = dev_path.replace("/", "_")
1549 fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1553 def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1554 """Updates the cache information for a given device.
1557 if dev_path is None:
1558 logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1560 fpath = cls._ConvertPath(dev_path)
1566 iv_name = "not_visible"
1567 fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1569 utils.WriteFile(fpath, data=fdata)
1570 except EnvironmentError, err:
1571 logger.Error("Can't update bdev cache for %s, error %s" %
1572 (dev_path, str(err)))
1575 def RemoveCache(cls, dev_path):
1576 """Remove data for a dev_path.
1579 if dev_path is None:
1580 logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1582 fpath = cls._ConvertPath(dev_path)
1584 utils.RemoveFile(fpath)
1585 except EnvironmentError, err:
1586 logger.Error("Can't update bdev cache for %s, error %s" %
1587 (dev_path, str(err)))