4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Functions used by the node daemon"""
35 from ganeti import logger
36 from ganeti import errors
37 from ganeti import utils
38 from ganeti import ssh
39 from ganeti import hypervisor
40 from ganeti import constants
41 from ganeti import bdev
42 from ganeti import objects
43 from ganeti import ssconf
47 return ssh.SshRunner()
51 """Activate local node as master node.
53 There are two needed steps for this:
54 - run the master script
55 - register the cron script
58 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
61 logger.Error("could not activate cluster interface with command %s,"
62 " error: '%s'" % (result.cmd, result.output))
69 """Deactivate this node as master.
71 This runs the master stop script.
74 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
77 logger.Error("could not deactivate cluster interface with command %s,"
78 " error: '%s'" % (result.cmd, result.output))
84 def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
85 """Joins this node to the cluster.
87 This does the following:
88 - updates the hostkeys of the machine (rsa and dsa)
89 - adds the ssh private key to the user
90 - adds the ssh public key to the users' authorized_keys file
93 sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
94 (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
95 (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
96 (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
97 for name, content, mode in sshd_keys:
98 utils.WriteFile(name, data=content, mode=mode)
101 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
103 except errors.OpExecError, err:
104 logger.Error("Error while processing user ssh files: %s" % err)
107 for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
108 utils.WriteFile(name, data=content, mode=0600)
110 utils.AddAuthorizedKey(auth_keys, sshpub)
112 utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
118 """Cleans up the current node and prepares it to be removed from the cluster.
121 if os.path.isdir(constants.DATA_DIR):
122 for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
123 full_name = os.path.join(constants.DATA_DIR, rel_name)
124 if os.path.isfile(full_name) and not os.path.islink(full_name):
125 utils.RemoveFile(full_name)
128 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
129 except errors.OpExecError, err:
130 logger.Error("Error while processing ssh files: %s" % err)
133 f = open(pub_key, 'r')
135 utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
139 utils.RemoveFile(priv_key)
140 utils.RemoveFile(pub_key)
142 # Return a reassuring string to the caller, and quit
143 raise errors.QuitGanetiException(False, 'Shutdown scheduled')
146 def GetNodeInfo(vgname):
147 """Gives back a hash with different informations about the node.
150 { 'vg_size' : xxx, 'vg_free' : xxx, 'memory_domain0': xxx,
151 'memory_free' : xxx, 'memory_total' : xxx }
153 vg_size is the size of the configured volume group in MiB
154 vg_free is the free size of the volume group in MiB
155 memory_dom0 is the memory allocated for domain0 in MiB
156 memory_free is the currently available (free) ram in MiB
157 memory_total is the total number of ram in MiB
161 vginfo = _GetVGInfo(vgname)
162 outputarray['vg_size'] = vginfo['vg_size']
163 outputarray['vg_free'] = vginfo['vg_free']
165 hyper = hypervisor.GetHypervisor()
166 hyp_info = hyper.GetNodeInfo()
167 if hyp_info is not None:
168 outputarray.update(hyp_info)
170 f = open("/proc/sys/kernel/random/boot_id", 'r')
172 outputarray["bootid"] = f.read(128).rstrip("\n")
179 def VerifyNode(what):
180 """Verify the status of the local node.
183 what - a dictionary of things to check:
184 'filelist' : list of files for which to compute checksums
185 'nodelist' : list of nodes we should check communication with
186 'hypervisor': run the hypervisor-specific verify
188 Requested files on local node are checksummed and the result returned.
190 The nodelist is traversed, with the following checks being made
192 - known_hosts key correct
193 - correct resolving of node name (target node returns its own hostname
194 by ssh-execution of 'hostname', result compared against name in list.
199 if 'hypervisor' in what:
200 result['hypervisor'] = hypervisor.GetHypervisor().Verify()
202 if 'filelist' in what:
203 result['filelist'] = utils.FingerprintFiles(what['filelist'])
205 if 'nodelist' in what:
206 result['nodelist'] = {}
207 random.shuffle(what['nodelist'])
208 for node in what['nodelist']:
209 success, message = _GetSshRunner().VerifyNodeHostname(node)
211 result['nodelist'][node] = message
212 if 'node-net-test' in what:
213 result['node-net-test'] = {}
214 my_name = utils.HostInfo().name
215 my_pip = my_sip = None
216 for name, pip, sip in what['node-net-test']:
222 result['node-net-test'][my_name] = ("Can't find my own"
223 " primary/secondary IP"
226 port = ssconf.SimpleStore().GetNodeDaemonPort()
227 for name, pip, sip in what['node-net-test']:
229 if not utils.TcpPing(pip, port, source=my_pip):
230 fail.append("primary")
232 if not utils.TcpPing(sip, port, source=my_sip):
233 fail.append("secondary")
235 result['node-net-test'][name] = ("failure using the %s"
242 def GetVolumeList(vg_name):
243 """Compute list of logical volumes and their size.
246 dictionary of all partions (key) with their size (in MiB), inactive
248 {'test1': ('20.06', True, True)}
253 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
254 "--separator=%s" % sep,
255 "-olv_name,lv_size,lv_attr", vg_name])
257 logger.Error("Failed to list logical volumes, lvs output: %s" %
261 for line in result.stdout.splitlines():
262 line = line.strip().rstrip(sep)
263 name, size, attr = line.split(sep)
266 inactive = attr[4] == '-'
267 online = attr[5] == 'o'
268 lvs[name] = (size, inactive, online)
273 def ListVolumeGroups():
274 """List the volume groups and their size.
277 Dictionary with keys volume name and values the size of the volume
280 return utils.ListVolumeGroups()
284 """List all volumes on this node.
287 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
289 "--options=lv_name,lv_size,devices,vg_name"])
291 logger.Error("Failed to list logical volumes, lvs output: %s" %
297 return dev.split('(')[0]
303 'name': line[0].strip(),
304 'size': line[1].strip(),
305 'dev': parse_dev(line[2].strip()),
306 'vg': line[3].strip(),
309 return [map_line(line.split('|')) for line in result.stdout.splitlines()]
312 def BridgesExist(bridges_list):
313 """Check if a list of bridges exist on the current node.
316 True if all of them exist, false otherwise
319 for bridge in bridges_list:
320 if not utils.BridgeExists(bridge):
326 def GetInstanceList():
327 """Provides a list of instances.
330 A list of all running instances on the current node
331 - instance1.example.com
332 - instance2.example.com
336 names = hypervisor.GetHypervisor().ListInstances()
337 except errors.HypervisorError, err:
338 logger.Error("error enumerating instances: %s" % str(err))
344 def GetInstanceInfo(instance):
345 """Gives back the informations about an instance as a dictionary.
348 instance: name of the instance (ex. instance1.example.com)
351 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
353 memory: memory size of instance (int)
354 state: xen state of instance (string)
355 time: cpu time of instance (float)
360 iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
361 if iinfo is not None:
362 output['memory'] = iinfo[2]
363 output['state'] = iinfo[4]
364 output['time'] = iinfo[5]
369 def GetAllInstancesInfo():
370 """Gather data about all instances.
372 This is the equivalent of `GetInstanceInfo()`, except that it
373 computes data for all instances at once, thus being faster if one
374 needs data about more than one instance.
376 Returns: a dictionary of dictionaries, keys being the instance name,
378 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
380 memory: memory size of instance (int)
381 state: xen state of instance (string)
382 time: cpu time of instance (float)
383 vcpus: the number of cpus
388 iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
390 for name, inst_id, memory, vcpus, state, times in iinfo:
401 def AddOSToInstance(instance, os_disk, swap_disk):
402 """Add an OS to an instance.
405 instance: the instance object
406 os_disk: the instance-visible name of the os device
407 swap_disk: the instance-visible name of the swap device
410 inst_os = OSFromDisk(instance.os)
412 create_script = inst_os.create_script
414 os_device = instance.FindDisk(os_disk)
415 if os_device is None:
416 logger.Error("Can't find this device-visible name '%s'" % os_disk)
419 swap_device = instance.FindDisk(swap_disk)
420 if swap_device is None:
421 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
424 real_os_dev = _RecursiveFindBD(os_device)
425 if real_os_dev is None:
426 raise errors.BlockDeviceError("Block device '%s' is not set up" %
430 real_swap_dev = _RecursiveFindBD(swap_device)
431 if real_swap_dev is None:
432 raise errors.BlockDeviceError("Block device '%s' is not set up" %
436 logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
437 instance.name, int(time.time()))
438 if not os.path.exists(constants.LOG_OS_DIR):
439 os.mkdir(constants.LOG_OS_DIR, 0750)
441 command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
442 inst_os.path, create_script, instance.name,
443 real_os_dev.dev_path, real_swap_dev.dev_path,
446 result = utils.RunCmd(command)
448 logger.Error("os create command '%s' returned error: %s, logfile: %s,"
450 (command, result.fail_reason, logfile, result.output))
456 def RunRenameInstance(instance, old_name, os_disk, swap_disk):
457 """Run the OS rename script for an instance.
460 instance: the instance object
461 old_name: the old name of the instance
462 os_disk: the instance-visible name of the os device
463 swap_disk: the instance-visible name of the swap device
466 inst_os = OSFromDisk(instance.os)
468 script = inst_os.rename_script
470 os_device = instance.FindDisk(os_disk)
471 if os_device is None:
472 logger.Error("Can't find this device-visible name '%s'" % os_disk)
475 swap_device = instance.FindDisk(swap_disk)
476 if swap_device is None:
477 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
480 real_os_dev = _RecursiveFindBD(os_device)
481 if real_os_dev is None:
482 raise errors.BlockDeviceError("Block device '%s' is not set up" %
486 real_swap_dev = _RecursiveFindBD(swap_device)
487 if real_swap_dev is None:
488 raise errors.BlockDeviceError("Block device '%s' is not set up" %
492 logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
494 instance.name, int(time.time()))
495 if not os.path.exists(constants.LOG_OS_DIR):
496 os.mkdir(constants.LOG_OS_DIR, 0750)
498 command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
499 inst_os.path, script, old_name, instance.name,
500 real_os_dev.dev_path, real_swap_dev.dev_path,
503 result = utils.RunCmd(command)
506 logger.Error("os create command '%s' returned error: %s"
508 (command, result.fail_reason, result.output))
514 def _GetVGInfo(vg_name):
515 """Get informations about the volume group.
518 vg_name: the volume group
521 { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
523 vg_size is the total size of the volume group in MiB
524 vg_free is the free size of the volume group in MiB
525 pv_count are the number of physical disks in that vg
527 If an error occurs during gathering of data, we return the same dict
528 with keys all set to None.
531 retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
533 retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
534 "--nosuffix", "--units=m", "--separator=:", vg_name])
537 errmsg = "volume group %s not present" % vg_name
540 valarr = retval.stdout.strip().rstrip(':').split(':')
544 "vg_size": int(round(float(valarr[0]), 0)),
545 "vg_free": int(round(float(valarr[1]), 0)),
546 "pv_count": int(valarr[2]),
548 except ValueError, err:
549 logger.Error("Fail to parse vgs output: %s" % str(err))
551 logger.Error("vgs output has the wrong number of fields (expected"
552 " three): %s" % str(valarr))
556 def _GatherBlockDevs(instance):
557 """Set up an instance's block device(s).
559 This is run on the primary node at instance startup. The block
560 devices must be already assembled.
564 for disk in instance.disks:
565 device = _RecursiveFindBD(disk)
567 raise errors.BlockDeviceError("Block device '%s' is not set up." %
570 block_devices.append((disk, device))
574 def StartInstance(instance, extra_args):
575 """Start an instance.
578 instance - name of instance to start.
581 running_instances = GetInstanceList()
583 if instance.name in running_instances:
586 block_devices = _GatherBlockDevs(instance)
587 hyper = hypervisor.GetHypervisor()
590 hyper.StartInstance(instance, block_devices, extra_args)
591 except errors.HypervisorError, err:
592 logger.Error("Failed to start instance: %s" % err)
598 def ShutdownInstance(instance):
599 """Shut an instance down.
602 instance - name of instance to shutdown.
605 running_instances = GetInstanceList()
607 if instance.name not in running_instances:
610 hyper = hypervisor.GetHypervisor()
612 hyper.StopInstance(instance)
613 except errors.HypervisorError, err:
614 logger.Error("Failed to stop instance: %s" % err)
617 # test every 10secs for 2min
621 for dummy in range(11):
622 if instance.name not in GetInstanceList():
626 # the shutdown did not succeed
627 logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
630 hyper.StopInstance(instance, force=True)
631 except errors.HypervisorError, err:
632 logger.Error("Failed to stop instance: %s" % err)
636 if instance.name in GetInstanceList():
637 logger.Error("could not shutdown instance '%s' even by destroy")
643 def RebootInstance(instance, reboot_type, extra_args):
644 """Reboot an instance.
647 instance - name of instance to reboot
648 reboot_type - how to reboot [soft,hard,full]
651 running_instances = GetInstanceList()
653 if instance.name not in running_instances:
654 logger.Error("Cannot reboot instance that is not running")
657 hyper = hypervisor.GetHypervisor()
658 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
660 hyper.RebootInstance(instance)
661 except errors.HypervisorError, err:
662 logger.Error("Failed to soft reboot instance: %s" % err)
664 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
666 ShutdownInstance(instance)
667 StartInstance(instance, extra_args)
668 except errors.HypervisorError, err:
669 logger.Error("Failed to hard reboot instance: %s" % err)
672 raise errors.ParameterError("reboot_type invalid")
678 def MigrateInstance(instance, target, live):
679 """Migrates an instance to another node.
682 hyper = hypervisor.GetHypervisor()
685 hyper.MigrateInstance(instance, target, live)
686 except errors.HypervisorError, err:
687 msg = "Failed to migrate instance: %s" % str(err)
690 return (True, "Migration successfull")
693 def CreateBlockDevice(disk, size, owner, on_primary, info):
694 """Creates a block device for an instance.
697 disk: a ganeti.objects.Disk object
698 size: the size of the physical underlying device
699 owner: a string with the name of the instance
700 on_primary: a boolean indicating if it is the primary node or not
701 info: string that will be sent to the physical device creation
704 the new unique_id of the device (this can sometime be
705 computed only after creation), or None. On secondary nodes,
706 it's not required to return anything.
711 for child in disk.children:
712 crdev = _RecursiveAssembleBD(child, owner, on_primary)
713 if on_primary or disk.AssembleOnSecondary():
714 # we need the children open in case the device itself has to
719 device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
720 if device is not None:
721 logger.Info("removing existing device %s" % disk)
723 except errors.BlockDeviceError, err:
726 device = bdev.Create(disk.dev_type, disk.physical_id,
729 raise ValueError("Can't create child device for %s, %s" %
731 if on_primary or disk.AssembleOnSecondary():
732 if not device.Assemble():
733 errorstring = "Can't assemble device after creation"
734 logger.Error(errorstring)
735 raise errors.BlockDeviceError("%s, very unusual event - check the node"
736 " daemon logs" % errorstring)
737 device.SetSyncSpeed(constants.SYNC_SPEED)
738 if on_primary or disk.OpenOnSecondary():
739 device.Open(force=True)
740 DevCacheManager.UpdateCache(device.dev_path, owner,
741 on_primary, disk.iv_name)
745 physical_id = device.unique_id
749 def RemoveBlockDevice(disk):
750 """Remove a block device.
752 This is intended to be called recursively.
756 # since we are removing the device, allow a partial match
757 # this allows removal of broken mirrors
758 rdev = _RecursiveFindBD(disk, allow_partial=True)
759 except errors.BlockDeviceError, err:
760 # probably can't attach
761 logger.Info("Can't attach to device %s in remove" % disk)
764 r_path = rdev.dev_path
765 result = rdev.Remove()
767 DevCacheManager.RemoveCache(r_path)
771 for child in disk.children:
772 result = result and RemoveBlockDevice(child)
776 def _RecursiveAssembleBD(disk, owner, as_primary):
777 """Activate a block device for an instance.
779 This is run on the primary and secondary nodes for an instance.
781 This function is called recursively.
784 disk: a objects.Disk object
785 as_primary: if we should make the block device read/write
788 the assembled device or None (in case no device was assembled)
790 If the assembly is not successful, an exception is raised.
795 mcn = disk.ChildrenNeeded()
797 mcn = 0 # max number of Nones allowed
799 mcn = len(disk.children) - mcn # max number of Nones
800 for chld_disk in disk.children:
802 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
803 except errors.BlockDeviceError, err:
804 if children.count(None) >= mcn:
807 logger.Debug("Error in child activation: %s" % str(err))
808 children.append(cdev)
810 if as_primary or disk.AssembleOnSecondary():
811 r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
812 r_dev.SetSyncSpeed(constants.SYNC_SPEED)
814 if as_primary or disk.OpenOnSecondary():
816 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
817 as_primary, disk.iv_name)
824 def AssembleBlockDevice(disk, owner, as_primary):
825 """Activate a block device for an instance.
827 This is a wrapper over _RecursiveAssembleBD.
830 a /dev path for primary nodes
831 True for secondary nodes
834 result = _RecursiveAssembleBD(disk, owner, as_primary)
835 if isinstance(result, bdev.BlockDev):
836 result = result.dev_path
840 def ShutdownBlockDevice(disk):
841 """Shut down a block device.
843 First, if the device is assembled (can `Attach()`), then the device
844 is shutdown. Then the children of the device are shutdown.
846 This function is called recursively. Note that we don't cache the
847 children or such, as oppossed to assemble, shutdown of different
848 devices doesn't require that the upper device was active.
851 r_dev = _RecursiveFindBD(disk)
852 if r_dev is not None:
853 r_path = r_dev.dev_path
854 result = r_dev.Shutdown()
856 DevCacheManager.RemoveCache(r_path)
860 for child in disk.children:
861 result = result and ShutdownBlockDevice(child)
865 def MirrorAddChildren(parent_cdev, new_cdevs):
866 """Extend a mirrored block device.
869 parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
870 if parent_bdev is None:
871 logger.Error("Can't find parent device")
873 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
874 if new_bdevs.count(None) > 0:
875 logger.Error("Can't find new device(s) to add: %s:%s" %
876 (new_bdevs, new_cdevs))
878 parent_bdev.AddChildren(new_bdevs)
882 def MirrorRemoveChildren(parent_cdev, new_cdevs):
883 """Shrink a mirrored block device.
886 parent_bdev = _RecursiveFindBD(parent_cdev)
887 if parent_bdev is None:
888 logger.Error("Can't find parent in remove children: %s" % parent_cdev)
891 for disk in new_cdevs:
892 rpath = disk.StaticDevPath()
894 bd = _RecursiveFindBD(disk)
896 logger.Error("Can't find dynamic device %s while removing children" %
900 devs.append(bd.dev_path)
903 parent_bdev.RemoveChildren(devs)
907 def GetMirrorStatus(disks):
908 """Get the mirroring status of a list of devices.
911 disks: list of `objects.Disk`
914 list of (mirror_done, estimated_time) tuples, which
915 are the result of bdev.BlockDevice.CombinedSyncStatus()
920 rbd = _RecursiveFindBD(dsk)
922 raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
923 stats.append(rbd.CombinedSyncStatus())
927 def _RecursiveFindBD(disk, allow_partial=False):
928 """Check if a device is activated.
930 If so, return informations about the real device.
933 disk: the objects.Disk instance
934 allow_partial: don't abort the find if a child of the
935 device can't be found; this is intended to be
936 used when repairing mirrors
939 None if the device can't be found
940 otherwise the device instance
945 for chdisk in disk.children:
946 children.append(_RecursiveFindBD(chdisk))
948 return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
951 def FindBlockDevice(disk):
952 """Check if a device is activated.
954 If so, return informations about the real device.
957 disk: the objects.Disk instance
959 None if the device can't be found
960 (device_path, major, minor, sync_percent, estimated_time, is_degraded)
963 rbd = _RecursiveFindBD(disk)
966 return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
969 def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
970 """Write a file to the filesystem.
972 This allows the master to overwrite(!) a file. It will only perform
973 the operation if the file belongs to a list of configuration files.
976 if not os.path.isabs(file_name):
977 logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
982 constants.CLUSTER_CONF_FILE,
984 constants.SSH_KNOWN_HOSTS_FILE,
985 constants.VNC_PASSWORD_FILE,
987 allowed_files.extend(ssconf.SimpleStore().GetFileList())
988 if file_name not in allowed_files:
989 logger.Error("Filename passed to UploadFile not in allowed"
990 " upload targets: '%s'" % file_name)
993 utils.WriteFile(file_name, data=data, mode=mode, uid=uid, gid=gid,
994 atime=atime, mtime=mtime)
998 def _ErrnoOrStr(err):
999 """Format an EnvironmentError exception.
1001 If the `err` argument has an errno attribute, it will be looked up
1002 and converted into a textual EXXXX description. Otherwise the string
1003 representation of the error will be returned.
1006 if hasattr(err, 'errno'):
1007 detail = errno.errorcode[err.errno]
1013 def _OSOndiskVersion(name, os_dir):
1014 """Compute and return the API version of a given OS.
1016 This function will try to read the API version of the os given by
1017 the 'name' parameter and residing in the 'os_dir' directory.
1019 Return value will be either an integer denoting the version or None in the
1020 case when this is not a valid OS name.
1023 api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1026 st = os.stat(api_file)
1027 except EnvironmentError, err:
1028 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1029 " found (%s)" % _ErrnoOrStr(err))
1031 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1032 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1038 api_version = f.read(256)
1041 except EnvironmentError, err:
1042 raise errors.InvalidOS(name, os_dir, "error while reading the"
1043 " API version (%s)" % _ErrnoOrStr(err))
1045 api_version = api_version.strip()
1047 api_version = int(api_version)
1048 except (TypeError, ValueError), err:
1049 raise errors.InvalidOS(name, os_dir,
1050 "API version is not integer (%s)" % str(err))
1055 def DiagnoseOS(top_dirs=None):
1056 """Compute the validity for all OSes.
1058 Returns an OS object for each name in all the given top directories
1059 (if not given defaults to constants.OS_SEARCH_PATH)
1065 if top_dirs is None:
1066 top_dirs = constants.OS_SEARCH_PATH
1069 for dir_name in top_dirs:
1070 if os.path.isdir(dir_name):
1072 f_names = utils.ListVisibleFiles(dir_name)
1073 except EnvironmentError, err:
1074 logger.Error("Can't list the OS directory %s: %s" %
1075 (dir_name, str(err)))
1077 for name in f_names:
1079 os_inst = OSFromDisk(name, base_dir=dir_name)
1080 result.append(os_inst)
1081 except errors.InvalidOS, err:
1082 result.append(objects.OS.FromInvalidOS(err))
1087 def OSFromDisk(name, base_dir=None):
1088 """Create an OS instance from disk.
1090 This function will return an OS instance if the given name is a
1091 valid OS name. Otherwise, it will raise an appropriate
1092 `errors.InvalidOS` exception, detailing why this is not a valid
1096 os_dir: Directory containing the OS scripts. Defaults to a search
1097 in all the OS_SEARCH_PATH directories.
1101 if base_dir is None:
1102 os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
1104 raise errors.InvalidOS(name, None, "OS dir not found in search path")
1106 os_dir = os.path.sep.join([base_dir, name])
1108 api_version = _OSOndiskVersion(name, os_dir)
1110 if api_version != constants.OS_API_VERSION:
1111 raise errors.InvalidOS(name, os_dir, "API version mismatch"
1112 " (found %s want %s)"
1113 % (api_version, constants.OS_API_VERSION))
1115 # OS Scripts dictionary, we will populate it with the actual script names
1116 os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1118 for script in os_scripts:
1119 os_scripts[script] = os.path.sep.join([os_dir, script])
1122 st = os.stat(os_scripts[script])
1123 except EnvironmentError, err:
1124 raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1125 (script, _ErrnoOrStr(err)))
1127 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1128 raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1131 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1132 raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1136 return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1137 create_script=os_scripts['create'],
1138 export_script=os_scripts['export'],
1139 import_script=os_scripts['import'],
1140 rename_script=os_scripts['rename'],
1141 api_version=api_version)
1144 def GrowBlockDevice(disk, amount):
1145 """Grow a stack of block devices.
1147 This function is called recursively, with the childrens being the
1151 disk: the disk to be grown
1153 Returns: a tuple of (status, result), with:
1154 status: the result (true/false) of the operation
1155 result: the error message if the operation failed, otherwise not used
1158 r_dev = _RecursiveFindBD(disk)
1160 return False, "Cannot find block device %s" % (disk,)
1164 except errors.BlockDeviceError, err:
1165 return False, str(err)
1170 def SnapshotBlockDevice(disk):
1171 """Create a snapshot copy of a block device.
1173 This function is called recursively, and the snapshot is actually created
1174 just for the leaf lvm backend device.
1177 disk: the disk to be snapshotted
1180 a config entry for the actual lvm device snapshotted.
1184 if len(disk.children) == 1:
1185 # only one child, let's recurse on it
1186 return SnapshotBlockDevice(disk.children[0])
1188 # more than one child, choose one that matches
1189 for child in disk.children:
1190 if child.size == disk.size:
1191 # return implies breaking the loop
1192 return SnapshotBlockDevice(child)
1193 elif disk.dev_type == constants.LD_LV:
1194 r_dev = _RecursiveFindBD(disk)
1195 if r_dev is not None:
1196 # let's stay on the safe side and ask for the full size, for now
1197 return r_dev.Snapshot(disk.size)
1201 raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1202 " '%s' of type '%s'" %
1203 (disk.unique_id, disk.dev_type))
1206 def ExportSnapshot(disk, dest_node, instance):
1207 """Export a block device snapshot to a remote node.
1210 disk: the snapshot block device
1211 dest_node: the node to send the image to
1212 instance: instance being exported
1215 True if successful, False otherwise.
1218 inst_os = OSFromDisk(instance.os)
1219 export_script = inst_os.export_script
1221 logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1222 instance.name, int(time.time()))
1223 if not os.path.exists(constants.LOG_OS_DIR):
1224 os.mkdir(constants.LOG_OS_DIR, 0750)
1226 real_os_dev = _RecursiveFindBD(disk)
1227 if real_os_dev is None:
1228 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1232 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1233 destfile = disk.physical_id[1]
1235 # the target command is built out of three individual commands,
1236 # which are joined by pipes; we check each individual command for
1239 expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1240 export_script, instance.name,
1241 real_os_dev.dev_path, logfile)
1245 destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1246 destdir, destdir, destfile)
1247 remotecmd = _GetSshRunner().BuildCmd(dest_node, constants.GANETI_RUNAS,
1250 # all commands have been checked, so we're safe to combine them
1251 command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1253 result = utils.RunCmd(command)
1256 logger.Error("os snapshot export command '%s' returned error: %s"
1258 (command, result.fail_reason, result.output))
1264 def FinalizeExport(instance, snap_disks):
1265 """Write out the export configuration information.
1268 instance: instance configuration
1269 snap_disks: snapshot block devices
1272 False in case of error, True otherwise.
1275 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1276 finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1278 config = objects.SerializableConfigParser()
1280 config.add_section(constants.INISECT_EXP)
1281 config.set(constants.INISECT_EXP, 'version', '0')
1282 config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1283 config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1284 config.set(constants.INISECT_EXP, 'os', instance.os)
1285 config.set(constants.INISECT_EXP, 'compression', 'gzip')
1287 config.add_section(constants.INISECT_INS)
1288 config.set(constants.INISECT_INS, 'name', instance.name)
1289 config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1290 config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1291 config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1294 for nic_count, nic in enumerate(instance.nics):
1295 config.set(constants.INISECT_INS, 'nic%d_mac' %
1296 nic_count, '%s' % nic.mac)
1297 config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1298 config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1299 # TODO: redundant: on load can read nics until it doesn't exist
1300 config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1303 for disk_count, disk in enumerate(snap_disks):
1304 config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1305 ('%s' % disk.iv_name))
1306 config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1307 ('%s' % disk.physical_id[1]))
1308 config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1310 config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1312 cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1313 cfo = open(cff, 'w')
1319 shutil.rmtree(finaldestdir, True)
1320 shutil.move(destdir, finaldestdir)
1325 def ExportInfo(dest):
1326 """Get export configuration information.
1329 dest: directory containing the export
1332 A serializable config file containing the export info.
1335 cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1337 config = objects.SerializableConfigParser()
1340 if (not config.has_section(constants.INISECT_EXP) or
1341 not config.has_section(constants.INISECT_INS)):
1347 def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1348 """Import an os image into an instance.
1351 instance: the instance object
1352 os_disk: the instance-visible name of the os device
1353 swap_disk: the instance-visible name of the swap device
1354 src_node: node holding the source image
1355 src_image: path to the source image on src_node
1358 False in case of error, True otherwise.
1361 inst_os = OSFromDisk(instance.os)
1362 import_script = inst_os.import_script
1364 os_device = instance.FindDisk(os_disk)
1365 if os_device is None:
1366 logger.Error("Can't find this device-visible name '%s'" % os_disk)
1369 swap_device = instance.FindDisk(swap_disk)
1370 if swap_device is None:
1371 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1374 real_os_dev = _RecursiveFindBD(os_device)
1375 if real_os_dev is None:
1376 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1380 real_swap_dev = _RecursiveFindBD(swap_device)
1381 if real_swap_dev is None:
1382 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1384 real_swap_dev.Open()
1386 logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1387 instance.name, int(time.time()))
1388 if not os.path.exists(constants.LOG_OS_DIR):
1389 os.mkdir(constants.LOG_OS_DIR, 0750)
1391 destcmd = utils.BuildShellCmd('cat %s', src_image)
1392 remotecmd = _GetSshRunner().BuildCmd(src_node, constants.GANETI_RUNAS,
1396 impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1397 inst_os.path, import_script, instance.name,
1398 real_os_dev.dev_path, real_swap_dev.dev_path,
1401 command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1403 result = utils.RunCmd(command)
1406 logger.Error("os import command '%s' returned error: %s"
1408 (command, result.fail_reason, result.output))
1415 """Return a list of exports currently available on this machine.
1418 if os.path.isdir(constants.EXPORT_DIR):
1419 return utils.ListVisibleFiles(constants.EXPORT_DIR)
1424 def RemoveExport(export):
1425 """Remove an existing export from the node.
1428 export: the name of the export to remove
1431 False in case of error, True otherwise.
1434 target = os.path.join(constants.EXPORT_DIR, export)
1436 shutil.rmtree(target)
1437 # TODO: catch some of the relevant exceptions and provide a pretty
1438 # error message if rmtree fails.
1443 def RenameBlockDevices(devlist):
1444 """Rename a list of block devices.
1446 The devlist argument is a list of tuples (disk, new_logical,
1447 new_physical). The return value will be a combined boolean result
1448 (True only if all renames succeeded).
1452 for disk, unique_id in devlist:
1453 dev = _RecursiveFindBD(disk)
1458 old_rpath = dev.dev_path
1459 dev.Rename(unique_id)
1460 new_rpath = dev.dev_path
1461 if old_rpath != new_rpath:
1462 DevCacheManager.RemoveCache(old_rpath)
1463 # FIXME: we should add the new cache information here, like:
1464 # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1465 # but we don't have the owner here - maybe parse from existing
1466 # cache? for now, we only lose lvm data when we rename, which
1467 # is less critical than DRBD or MD
1468 except errors.BlockDeviceError, err:
1469 logger.Error("Can't rename device '%s' to '%s': %s" %
1470 (dev, unique_id, err))
1475 def _TransformFileStorageDir(file_storage_dir):
1476 """Checks whether given file_storage_dir is valid.
1478 Checks wheter the given file_storage_dir is within the cluster-wide
1479 default file_storage_dir stored in SimpleStore. Only paths under that
1480 directory are allowed.
1483 file_storage_dir: string with path
1486 normalized file_storage_dir (string) if valid, None otherwise
1489 file_storage_dir = os.path.normpath(file_storage_dir)
1490 base_file_storage_dir = ssconf.SimpleStore().GetFileStorageDir()
1491 if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
1492 base_file_storage_dir):
1493 logger.Error("file storage directory '%s' is not under base file"
1494 " storage directory '%s'" %
1495 (file_storage_dir, base_file_storage_dir))
1497 return file_storage_dir
1500 def CreateFileStorageDir(file_storage_dir):
1501 """Create file storage directory.
1504 file_storage_dir: string containing the path
1507 tuple with first element a boolean indicating wheter dir
1508 creation was successful or not
1511 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1513 if not file_storage_dir:
1516 if os.path.exists(file_storage_dir):
1517 if not os.path.isdir(file_storage_dir):
1518 logger.Error("'%s' is not a directory" % file_storage_dir)
1522 os.makedirs(file_storage_dir, 0750)
1523 except OSError, err:
1524 logger.Error("Cannot create file storage directory '%s': %s" %
1525 (file_storage_dir, err))
1530 def RemoveFileStorageDir(file_storage_dir):
1531 """Remove file storage directory.
1533 Remove it only if it's empty. If not log an error and return.
1536 file_storage_dir: string containing the path
1539 tuple with first element a boolean indicating wheter dir
1540 removal was successful or not
1543 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1545 if not file_storage_dir:
1548 if os.path.exists(file_storage_dir):
1549 if not os.path.isdir(file_storage_dir):
1550 logger.Error("'%s' is not a directory" % file_storage_dir)
1552 # deletes dir only if empty, otherwise we want to return False
1554 os.rmdir(file_storage_dir)
1555 except OSError, err:
1556 logger.Error("Cannot remove file storage directory '%s': %s" %
1557 (file_storage_dir, err))
1562 def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
1563 """Rename the file storage directory.
1566 old_file_storage_dir: string containing the old path
1567 new_file_storage_dir: string containing the new path
1570 tuple with first element a boolean indicating wheter dir
1571 rename was successful or not
1574 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
1575 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
1577 if not old_file_storage_dir or not new_file_storage_dir:
1580 if not os.path.exists(new_file_storage_dir):
1581 if os.path.isdir(old_file_storage_dir):
1583 os.rename(old_file_storage_dir, new_file_storage_dir)
1584 except OSError, err:
1585 logger.Error("Cannot rename '%s' to '%s': %s"
1586 % (old_file_storage_dir, new_file_storage_dir, err))
1589 logger.Error("'%s' is not a directory" % old_file_storage_dir)
1592 if os.path.exists(old_file_storage_dir):
1593 logger.Error("Cannot rename '%s' to '%s'. Both locations exist." %
1594 old_file_storage_dir, new_file_storage_dir)
1599 def CloseBlockDevices(disks):
1600 """Closes the given block devices.
1602 This means they will be switched to secondary mode (in case of DRBD).
1607 rd = _RecursiveFindBD(cf)
1609 return (False, "Can't find device %s" % cf)
1616 except errors.BlockDeviceError, err:
1617 msg.append(str(err))
1619 return (False, "Can't make devices secondary: %s" % ",".join(msg))
1621 return (True, "All devices secondary")
1624 class HooksRunner(object):
1627 This class is instantiated on the node side (ganeti-noded) and not on
1631 RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1633 def __init__(self, hooks_base_dir=None):
1634 """Constructor for hooks runner.
1637 - hooks_base_dir: if not None, this overrides the
1638 constants.HOOKS_BASE_DIR (useful for unittests)
1641 if hooks_base_dir is None:
1642 hooks_base_dir = constants.HOOKS_BASE_DIR
1643 self._BASE_DIR = hooks_base_dir
1646 def ExecHook(script, env):
1647 """Exec one hook script.
1650 - script: the full path to the script
1651 - env: the environment with which to exec the script
1654 # exec the process using subprocess and log the output
1657 fdstdin = open("/dev/null", "r")
1658 child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1659 stderr=subprocess.STDOUT, close_fds=True,
1660 shell=False, cwd="/", env=env)
1663 output = child.stdout.read(4096)
1664 child.stdout.close()
1665 except EnvironmentError, err:
1666 output += "Hook script error: %s" % str(err)
1670 result = child.wait()
1672 except EnvironmentError, err:
1673 if err.errno == errno.EINTR:
1677 # try not to leak fds
1678 for fd in (fdstdin, ):
1682 except EnvironmentError, err:
1683 # just log the error
1684 #logger.Error("While closing fd %s: %s" % (fd, err))
1687 return result == 0, output
1689 def RunHooks(self, hpath, phase, env):
1690 """Run the scripts in the hooks directory.
1692 This method will not be usually overriden by child opcodes.
1695 if phase == constants.HOOKS_PHASE_PRE:
1697 elif phase == constants.HOOKS_PHASE_POST:
1700 raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1703 subdir = "%s-%s.d" % (hpath, suffix)
1704 dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1706 dir_contents = utils.ListVisibleFiles(dir_name)
1707 except OSError, err:
1711 # we use the standard python sort order,
1712 # so 00name is the recommended naming scheme
1714 for relname in dir_contents:
1715 fname = os.path.join(dir_name, relname)
1716 if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1717 self.RE_MASK.match(relname) is not None):
1718 rrval = constants.HKR_SKIP
1721 result, output = self.ExecHook(fname, env)
1723 rrval = constants.HKR_FAIL
1725 rrval = constants.HKR_SUCCESS
1726 rr.append(("%s/%s" % (subdir, relname), rrval, output))
1731 class IAllocatorRunner(object):
1732 """IAllocator runner.
1734 This class is instantiated on the node side (ganeti-noded) and not on
1738 def Run(self, name, idata):
1739 """Run an iallocator script.
1741 Return value: tuple of:
1742 - run status (one of the IARUN_ constants)
1745 - fail reason (as from utils.RunResult)
1748 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
1750 if alloc_script is None:
1751 return (constants.IARUN_NOTFOUND, None, None, None)
1753 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
1757 result = utils.RunCmd([alloc_script, fin_name])
1759 return (constants.IARUN_FAILURE, result.stdout, result.stderr,
1764 return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
1767 class DevCacheManager(object):
1768 """Simple class for managing a cache of block device information.
1771 _DEV_PREFIX = "/dev/"
1772 _ROOT_DIR = constants.BDEV_CACHE_DIR
1775 def _ConvertPath(cls, dev_path):
1776 """Converts a /dev/name path to the cache file name.
1778 This replaces slashes with underscores and strips the /dev
1779 prefix. It then returns the full path to the cache file
1782 if dev_path.startswith(cls._DEV_PREFIX):
1783 dev_path = dev_path[len(cls._DEV_PREFIX):]
1784 dev_path = dev_path.replace("/", "_")
1785 fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1789 def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1790 """Updates the cache information for a given device.
1793 if dev_path is None:
1794 logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1796 fpath = cls._ConvertPath(dev_path)
1802 iv_name = "not_visible"
1803 fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1805 utils.WriteFile(fpath, data=fdata)
1806 except EnvironmentError, err:
1807 logger.Error("Can't update bdev cache for %s, error %s" %
1808 (dev_path, str(err)))
1811 def RemoveCache(cls, dev_path):
1812 """Remove data for a dev_path.
1815 if dev_path is None:
1816 logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1818 fpath = cls._ConvertPath(dev_path)
1820 utils.RemoveFile(fpath)
1821 except EnvironmentError, err:
1822 logger.Error("Can't update bdev cache for %s, error %s" %
1823 (dev_path, str(err)))