4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Functions used by the node daemon"""
35 from ganeti import logger
36 from ganeti import errors
37 from ganeti import utils
38 from ganeti import ssh
39 from ganeti import hypervisor
40 from ganeti import constants
41 from ganeti import bdev
42 from ganeti import objects
43 from ganeti import ssconf
47 return ssh.SshRunner()
51 """Activate local node as master node.
53 There are two needed steps for this:
54 - run the master script
55 - register the cron script
58 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
61 logger.Error("could not activate cluster interface with command %s,"
62 " error: '%s'" % (result.cmd, result.output))
69 """Deactivate this node as master.
71 This runs the master stop script.
74 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
77 logger.Error("could not deactivate cluster interface with command %s,"
78 " error: '%s'" % (result.cmd, result.output))
84 def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
85 """Joins this node to the cluster.
87 This does the following:
88 - updates the hostkeys of the machine (rsa and dsa)
89 - adds the ssh private key to the user
90 - adds the ssh public key to the users' authorized_keys file
93 sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
94 (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
95 (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
96 (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
97 for name, content, mode in sshd_keys:
98 utils.WriteFile(name, data=content, mode=mode)
101 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
103 except errors.OpExecError, err:
104 logger.Error("Error while processing user ssh files: %s" % err)
107 for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
108 utils.WriteFile(name, data=content, mode=0600)
110 utils.AddAuthorizedKey(auth_keys, sshpub)
112 utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
118 """Cleans up the current node and prepares it to be removed from the cluster.
121 if os.path.isdir(constants.DATA_DIR):
122 for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
123 full_name = os.path.join(constants.DATA_DIR, rel_name)
124 if os.path.isfile(full_name) and not os.path.islink(full_name):
125 utils.RemoveFile(full_name)
128 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
129 except errors.OpExecError, err:
130 logger.Error("Error while processing ssh files: %s" % err)
133 f = open(pub_key, 'r')
135 utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
139 utils.RemoveFile(priv_key)
140 utils.RemoveFile(pub_key)
142 # Return a reassuring string to the caller, and quit
143 raise errors.QuitGanetiException(False, 'Shutdown scheduled')
146 def GetNodeInfo(vgname):
147 """Gives back a hash with different informations about the node.
150 { 'vg_size' : xxx, 'vg_free' : xxx, 'memory_domain0': xxx,
151 'memory_free' : xxx, 'memory_total' : xxx }
153 vg_size is the size of the configured volume group in MiB
154 vg_free is the free size of the volume group in MiB
155 memory_dom0 is the memory allocated for domain0 in MiB
156 memory_free is the currently available (free) ram in MiB
157 memory_total is the total number of ram in MiB
161 vginfo = _GetVGInfo(vgname)
162 outputarray['vg_size'] = vginfo['vg_size']
163 outputarray['vg_free'] = vginfo['vg_free']
165 hyper = hypervisor.GetHypervisor()
166 hyp_info = hyper.GetNodeInfo()
167 if hyp_info is not None:
168 outputarray.update(hyp_info)
170 f = open("/proc/sys/kernel/random/boot_id", 'r')
172 outputarray["bootid"] = f.read(128).rstrip("\n")
179 def VerifyNode(what):
180 """Verify the status of the local node.
183 what - a dictionary of things to check:
184 'filelist' : list of files for which to compute checksums
185 'nodelist' : list of nodes we should check communication with
186 'hypervisor': run the hypervisor-specific verify
188 Requested files on local node are checksummed and the result returned.
190 The nodelist is traversed, with the following checks being made
192 - known_hosts key correct
193 - correct resolving of node name (target node returns its own hostname
194 by ssh-execution of 'hostname', result compared against name in list.
199 if 'hypervisor' in what:
200 result['hypervisor'] = hypervisor.GetHypervisor().Verify()
202 if 'filelist' in what:
203 result['filelist'] = utils.FingerprintFiles(what['filelist'])
205 if 'nodelist' in what:
206 result['nodelist'] = {}
207 random.shuffle(what['nodelist'])
208 for node in what['nodelist']:
209 success, message = _GetSshRunner().VerifyNodeHostname(node)
211 result['nodelist'][node] = message
212 if 'node-net-test' in what:
213 result['node-net-test'] = {}
214 my_name = utils.HostInfo().name
215 my_pip = my_sip = None
216 for name, pip, sip in what['node-net-test']:
222 result['node-net-test'][my_name] = ("Can't find my own"
223 " primary/secondary IP"
226 port = ssconf.SimpleStore().GetNodeDaemonPort()
227 for name, pip, sip in what['node-net-test']:
229 if not utils.TcpPing(pip, port, source=my_pip):
230 fail.append("primary")
232 if not utils.TcpPing(sip, port, source=my_sip):
233 fail.append("secondary")
235 result['node-net-test'][name] = ("failure using the %s"
242 def GetVolumeList(vg_name):
243 """Compute list of logical volumes and their size.
246 dictionary of all partions (key) with their size (in MiB), inactive
248 {'test1': ('20.06', True, True)}
253 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
254 "--separator=%s" % sep,
255 "-olv_name,lv_size,lv_attr", vg_name])
257 logger.Error("Failed to list logical volumes, lvs output: %s" %
261 for line in result.stdout.splitlines():
262 line = line.strip().rstrip(sep)
263 name, size, attr = line.split(sep)
266 inactive = attr[4] == '-'
267 online = attr[5] == 'o'
268 lvs[name] = (size, inactive, online)
273 def ListVolumeGroups():
274 """List the volume groups and their size.
277 Dictionary with keys volume name and values the size of the volume
280 return utils.ListVolumeGroups()
284 """List all volumes on this node.
287 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
289 "--options=lv_name,lv_size,devices,vg_name"])
291 logger.Error("Failed to list logical volumes, lvs output: %s" %
297 return dev.split('(')[0]
303 'name': line[0].strip(),
304 'size': line[1].strip(),
305 'dev': parse_dev(line[2].strip()),
306 'vg': line[3].strip(),
309 return [map_line(line.split('|')) for line in result.stdout.splitlines()]
312 def BridgesExist(bridges_list):
313 """Check if a list of bridges exist on the current node.
316 True if all of them exist, false otherwise
319 for bridge in bridges_list:
320 if not utils.BridgeExists(bridge):
326 def GetInstanceList():
327 """Provides a list of instances.
330 A list of all running instances on the current node
331 - instance1.example.com
332 - instance2.example.com
336 names = hypervisor.GetHypervisor().ListInstances()
337 except errors.HypervisorError, err:
338 logger.Error("error enumerating instances: %s" % str(err))
344 def GetInstanceInfo(instance):
345 """Gives back the informations about an instance as a dictionary.
348 instance: name of the instance (ex. instance1.example.com)
351 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
353 memory: memory size of instance (int)
354 state: xen state of instance (string)
355 time: cpu time of instance (float)
360 iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
361 if iinfo is not None:
362 output['memory'] = iinfo[2]
363 output['state'] = iinfo[4]
364 output['time'] = iinfo[5]
369 def GetAllInstancesInfo():
370 """Gather data about all instances.
372 This is the equivalent of `GetInstanceInfo()`, except that it
373 computes data for all instances at once, thus being faster if one
374 needs data about more than one instance.
376 Returns: a dictionary of dictionaries, keys being the instance name,
378 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
380 memory: memory size of instance (int)
381 state: xen state of instance (string)
382 time: cpu time of instance (float)
383 vcpus: the number of cpus
388 iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
390 for name, inst_id, memory, vcpus, state, times in iinfo:
401 def AddOSToInstance(instance, os_disk, swap_disk):
402 """Add an OS to an instance.
405 instance: the instance object
406 os_disk: the instance-visible name of the os device
407 swap_disk: the instance-visible name of the swap device
410 inst_os = OSFromDisk(instance.os)
412 create_script = inst_os.create_script
414 os_device = instance.FindDisk(os_disk)
415 if os_device is None:
416 logger.Error("Can't find this device-visible name '%s'" % os_disk)
419 swap_device = instance.FindDisk(swap_disk)
420 if swap_device is None:
421 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
424 real_os_dev = _RecursiveFindBD(os_device)
425 if real_os_dev is None:
426 raise errors.BlockDeviceError("Block device '%s' is not set up" %
430 real_swap_dev = _RecursiveFindBD(swap_device)
431 if real_swap_dev is None:
432 raise errors.BlockDeviceError("Block device '%s' is not set up" %
436 logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
437 instance.name, int(time.time()))
438 if not os.path.exists(constants.LOG_OS_DIR):
439 os.mkdir(constants.LOG_OS_DIR, 0750)
441 command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
442 inst_os.path, create_script, instance.name,
443 real_os_dev.dev_path, real_swap_dev.dev_path,
446 result = utils.RunCmd(command)
448 logger.Error("os create command '%s' returned error: %s, logfile: %s,"
450 (command, result.fail_reason, logfile, result.output))
456 def RunRenameInstance(instance, old_name, os_disk, swap_disk):
457 """Run the OS rename script for an instance.
460 instance: the instance object
461 old_name: the old name of the instance
462 os_disk: the instance-visible name of the os device
463 swap_disk: the instance-visible name of the swap device
466 inst_os = OSFromDisk(instance.os)
468 script = inst_os.rename_script
470 os_device = instance.FindDisk(os_disk)
471 if os_device is None:
472 logger.Error("Can't find this device-visible name '%s'" % os_disk)
475 swap_device = instance.FindDisk(swap_disk)
476 if swap_device is None:
477 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
480 real_os_dev = _RecursiveFindBD(os_device)
481 if real_os_dev is None:
482 raise errors.BlockDeviceError("Block device '%s' is not set up" %
486 real_swap_dev = _RecursiveFindBD(swap_device)
487 if real_swap_dev is None:
488 raise errors.BlockDeviceError("Block device '%s' is not set up" %
492 logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
494 instance.name, int(time.time()))
495 if not os.path.exists(constants.LOG_OS_DIR):
496 os.mkdir(constants.LOG_OS_DIR, 0750)
498 command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
499 inst_os.path, script, old_name, instance.name,
500 real_os_dev.dev_path, real_swap_dev.dev_path,
503 result = utils.RunCmd(command)
506 logger.Error("os create command '%s' returned error: %s"
508 (command, result.fail_reason, result.output))
514 def _GetVGInfo(vg_name):
515 """Get informations about the volume group.
518 vg_name: the volume group
521 { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
523 vg_size is the total size of the volume group in MiB
524 vg_free is the free size of the volume group in MiB
525 pv_count are the number of physical disks in that vg
527 If an error occurs during gathering of data, we return the same dict
528 with keys all set to None.
531 retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
533 retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
534 "--nosuffix", "--units=m", "--separator=:", vg_name])
537 errmsg = "volume group %s not present" % vg_name
540 valarr = retval.stdout.strip().rstrip(':').split(':')
544 "vg_size": int(round(float(valarr[0]), 0)),
545 "vg_free": int(round(float(valarr[1]), 0)),
546 "pv_count": int(valarr[2]),
548 except ValueError, err:
549 logger.Error("Fail to parse vgs output: %s" % str(err))
551 logger.Error("vgs output has the wrong number of fields (expected"
552 " three): %s" % str(valarr))
556 def _GatherBlockDevs(instance):
557 """Set up an instance's block device(s).
559 This is run on the primary node at instance startup. The block
560 devices must be already assembled.
564 for disk in instance.disks:
565 device = _RecursiveFindBD(disk)
567 raise errors.BlockDeviceError("Block device '%s' is not set up." %
570 block_devices.append((disk, device))
574 def StartInstance(instance, extra_args):
575 """Start an instance.
578 instance - name of instance to start.
581 running_instances = GetInstanceList()
583 if instance.name in running_instances:
586 block_devices = _GatherBlockDevs(instance)
587 hyper = hypervisor.GetHypervisor()
590 hyper.StartInstance(instance, block_devices, extra_args)
591 except errors.HypervisorError, err:
592 logger.Error("Failed to start instance: %s" % err)
598 def ShutdownInstance(instance):
599 """Shut an instance down.
602 instance - name of instance to shutdown.
605 running_instances = GetInstanceList()
607 if instance.name not in running_instances:
610 hyper = hypervisor.GetHypervisor()
612 hyper.StopInstance(instance)
613 except errors.HypervisorError, err:
614 logger.Error("Failed to stop instance: %s" % err)
617 # test every 10secs for 2min
621 for dummy in range(11):
622 if instance.name not in GetInstanceList():
626 # the shutdown did not succeed
627 logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
630 hyper.StopInstance(instance, force=True)
631 except errors.HypervisorError, err:
632 logger.Error("Failed to stop instance: %s" % err)
636 if instance.name in GetInstanceList():
637 logger.Error("could not shutdown instance '%s' even by destroy")
643 def RebootInstance(instance, reboot_type, extra_args):
644 """Reboot an instance.
647 instance - name of instance to reboot
648 reboot_type - how to reboot [soft,hard,full]
651 running_instances = GetInstanceList()
653 if instance.name not in running_instances:
654 logger.Error("Cannot reboot instance that is not running")
657 hyper = hypervisor.GetHypervisor()
658 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
660 hyper.RebootInstance(instance)
661 except errors.HypervisorError, err:
662 logger.Error("Failed to soft reboot instance: %s" % err)
664 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
666 ShutdownInstance(instance)
667 StartInstance(instance, extra_args)
668 except errors.HypervisorError, err:
669 logger.Error("Failed to hard reboot instance: %s" % err)
672 raise errors.ParameterError("reboot_type invalid")
678 def MigrateInstance(instance, target, live):
679 """Migrates an instance to another node.
682 hyper = hypervisor.GetHypervisor()
685 hyper.MigrateInstance(instance, target, live)
686 except errors.HypervisorError, err:
687 msg = "Failed to migrate instance: %s" % str(err)
690 return (True, "Migration successfull")
693 def CreateBlockDevice(disk, size, owner, on_primary, info):
694 """Creates a block device for an instance.
697 disk: a ganeti.objects.Disk object
698 size: the size of the physical underlying device
699 owner: a string with the name of the instance
700 on_primary: a boolean indicating if it is the primary node or not
701 info: string that will be sent to the physical device creation
704 the new unique_id of the device (this can sometime be
705 computed only after creation), or None. On secondary nodes,
706 it's not required to return anything.
711 for child in disk.children:
712 crdev = _RecursiveAssembleBD(child, owner, on_primary)
713 if on_primary or disk.AssembleOnSecondary():
714 # we need the children open in case the device itself has to
719 device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
720 if device is not None:
721 logger.Info("removing existing device %s" % disk)
723 except errors.BlockDeviceError, err:
726 device = bdev.Create(disk.dev_type, disk.physical_id,
729 raise ValueError("Can't create child device for %s, %s" %
731 if on_primary or disk.AssembleOnSecondary():
732 if not device.Assemble():
733 errorstring = "Can't assemble device after creation"
734 logger.Error(errorstring)
735 raise errors.BlockDeviceError("%s, very unusual event - check the node"
736 " daemon logs" % errorstring)
737 device.SetSyncSpeed(constants.SYNC_SPEED)
738 if on_primary or disk.OpenOnSecondary():
739 device.Open(force=True)
740 DevCacheManager.UpdateCache(device.dev_path, owner,
741 on_primary, disk.iv_name)
745 physical_id = device.unique_id
749 def RemoveBlockDevice(disk):
750 """Remove a block device.
752 This is intended to be called recursively.
756 # since we are removing the device, allow a partial match
757 # this allows removal of broken mirrors
758 rdev = _RecursiveFindBD(disk, allow_partial=True)
759 except errors.BlockDeviceError, err:
760 # probably can't attach
761 logger.Info("Can't attach to device %s in remove" % disk)
764 r_path = rdev.dev_path
765 result = rdev.Remove()
767 DevCacheManager.RemoveCache(r_path)
771 for child in disk.children:
772 result = result and RemoveBlockDevice(child)
776 def _RecursiveAssembleBD(disk, owner, as_primary):
777 """Activate a block device for an instance.
779 This is run on the primary and secondary nodes for an instance.
781 This function is called recursively.
784 disk: a objects.Disk object
785 as_primary: if we should make the block device read/write
788 the assembled device or None (in case no device was assembled)
790 If the assembly is not successful, an exception is raised.
795 mcn = disk.ChildrenNeeded()
797 mcn = 0 # max number of Nones allowed
799 mcn = len(disk.children) - mcn # max number of Nones
800 for chld_disk in disk.children:
802 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
803 except errors.BlockDeviceError, err:
804 if children.count(None) >= mcn:
807 logger.Debug("Error in child activation: %s" % str(err))
808 children.append(cdev)
810 if as_primary or disk.AssembleOnSecondary():
811 r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
812 r_dev.SetSyncSpeed(constants.SYNC_SPEED)
814 if as_primary or disk.OpenOnSecondary():
816 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
817 as_primary, disk.iv_name)
824 def AssembleBlockDevice(disk, owner, as_primary):
825 """Activate a block device for an instance.
827 This is a wrapper over _RecursiveAssembleBD.
830 a /dev path for primary nodes
831 True for secondary nodes
834 result = _RecursiveAssembleBD(disk, owner, as_primary)
835 if isinstance(result, bdev.BlockDev):
836 result = result.dev_path
840 def ShutdownBlockDevice(disk):
841 """Shut down a block device.
843 First, if the device is assembled (can `Attach()`), then the device
844 is shutdown. Then the children of the device are shutdown.
846 This function is called recursively. Note that we don't cache the
847 children or such, as oppossed to assemble, shutdown of different
848 devices doesn't require that the upper device was active.
851 r_dev = _RecursiveFindBD(disk)
852 if r_dev is not None:
853 r_path = r_dev.dev_path
854 result = r_dev.Shutdown()
856 DevCacheManager.RemoveCache(r_path)
860 for child in disk.children:
861 result = result and ShutdownBlockDevice(child)
865 def MirrorAddChildren(parent_cdev, new_cdevs):
866 """Extend a mirrored block device.
869 parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
870 if parent_bdev is None:
871 logger.Error("Can't find parent device")
873 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
874 if new_bdevs.count(None) > 0:
875 logger.Error("Can't find new device(s) to add: %s:%s" %
876 (new_bdevs, new_cdevs))
878 parent_bdev.AddChildren(new_bdevs)
882 def MirrorRemoveChildren(parent_cdev, new_cdevs):
883 """Shrink a mirrored block device.
886 parent_bdev = _RecursiveFindBD(parent_cdev)
887 if parent_bdev is None:
888 logger.Error("Can't find parent in remove children: %s" % parent_cdev)
891 for disk in new_cdevs:
892 rpath = disk.StaticDevPath()
894 bd = _RecursiveFindBD(disk)
896 logger.Error("Can't find dynamic device %s while removing children" %
900 devs.append(bd.dev_path)
903 parent_bdev.RemoveChildren(devs)
907 def GetMirrorStatus(disks):
908 """Get the mirroring status of a list of devices.
911 disks: list of `objects.Disk`
914 list of (mirror_done, estimated_time) tuples, which
915 are the result of bdev.BlockDevice.CombinedSyncStatus()
920 rbd = _RecursiveFindBD(dsk)
922 raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
923 stats.append(rbd.CombinedSyncStatus())
927 def _RecursiveFindBD(disk, allow_partial=False):
928 """Check if a device is activated.
930 If so, return informations about the real device.
933 disk: the objects.Disk instance
934 allow_partial: don't abort the find if a child of the
935 device can't be found; this is intended to be
936 used when repairing mirrors
939 None if the device can't be found
940 otherwise the device instance
945 for chdisk in disk.children:
946 children.append(_RecursiveFindBD(chdisk))
948 return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
951 def FindBlockDevice(disk):
952 """Check if a device is activated.
954 If so, return informations about the real device.
957 disk: the objects.Disk instance
959 None if the device can't be found
960 (device_path, major, minor, sync_percent, estimated_time, is_degraded)
963 rbd = _RecursiveFindBD(disk)
966 return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
969 def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
970 """Write a file to the filesystem.
972 This allows the master to overwrite(!) a file. It will only perform
973 the operation if the file belongs to a list of configuration files.
976 if not os.path.isabs(file_name):
977 logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
982 constants.CLUSTER_CONF_FILE,
984 constants.SSH_KNOWN_HOSTS_FILE,
986 allowed_files.extend(ssconf.SimpleStore().GetFileList())
987 if file_name not in allowed_files:
988 logger.Error("Filename passed to UploadFile not in allowed"
989 " upload targets: '%s'" % file_name)
992 utils.WriteFile(file_name, data=data, mode=mode, uid=uid, gid=gid,
993 atime=atime, mtime=mtime)
997 def _ErrnoOrStr(err):
998 """Format an EnvironmentError exception.
1000 If the `err` argument has an errno attribute, it will be looked up
1001 and converted into a textual EXXXX description. Otherwise the string
1002 representation of the error will be returned.
1005 if hasattr(err, 'errno'):
1006 detail = errno.errorcode[err.errno]
1012 def _OSOndiskVersion(name, os_dir):
1013 """Compute and return the API version of a given OS.
1015 This function will try to read the API version of the os given by
1016 the 'name' parameter and residing in the 'os_dir' directory.
1018 Return value will be either an integer denoting the version or None in the
1019 case when this is not a valid OS name.
1022 api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1025 st = os.stat(api_file)
1026 except EnvironmentError, err:
1027 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1028 " found (%s)" % _ErrnoOrStr(err))
1030 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1031 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1037 api_version = f.read(256)
1040 except EnvironmentError, err:
1041 raise errors.InvalidOS(name, os_dir, "error while reading the"
1042 " API version (%s)" % _ErrnoOrStr(err))
1044 api_version = api_version.strip()
1046 api_version = int(api_version)
1047 except (TypeError, ValueError), err:
1048 raise errors.InvalidOS(name, os_dir,
1049 "API version is not integer (%s)" % str(err))
1054 def DiagnoseOS(top_dirs=None):
1055 """Compute the validity for all OSes.
1057 Returns an OS object for each name in all the given top directories
1058 (if not given defaults to constants.OS_SEARCH_PATH)
1064 if top_dirs is None:
1065 top_dirs = constants.OS_SEARCH_PATH
1068 for dir_name in top_dirs:
1069 if os.path.isdir(dir_name):
1071 f_names = utils.ListVisibleFiles(dir_name)
1072 except EnvironmentError, err:
1073 logger.Error("Can't list the OS directory %s: %s" %
1074 (dir_name, str(err)))
1076 for name in f_names:
1078 os_inst = OSFromDisk(name, base_dir=dir_name)
1079 result.append(os_inst)
1080 except errors.InvalidOS, err:
1081 result.append(objects.OS.FromInvalidOS(err))
1086 def OSFromDisk(name, base_dir=None):
1087 """Create an OS instance from disk.
1089 This function will return an OS instance if the given name is a
1090 valid OS name. Otherwise, it will raise an appropriate
1091 `errors.InvalidOS` exception, detailing why this is not a valid
1095 os_dir: Directory containing the OS scripts. Defaults to a search
1096 in all the OS_SEARCH_PATH directories.
1100 if base_dir is None:
1101 os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
1103 raise errors.InvalidOS(name, None, "OS dir not found in search path")
1105 os_dir = os.path.sep.join([base_dir, name])
1107 api_version = _OSOndiskVersion(name, os_dir)
1109 if api_version != constants.OS_API_VERSION:
1110 raise errors.InvalidOS(name, os_dir, "API version mismatch"
1111 " (found %s want %s)"
1112 % (api_version, constants.OS_API_VERSION))
1114 # OS Scripts dictionary, we will populate it with the actual script names
1115 os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1117 for script in os_scripts:
1118 os_scripts[script] = os.path.sep.join([os_dir, script])
1121 st = os.stat(os_scripts[script])
1122 except EnvironmentError, err:
1123 raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1124 (script, _ErrnoOrStr(err)))
1126 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1127 raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1130 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1131 raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1135 return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1136 create_script=os_scripts['create'],
1137 export_script=os_scripts['export'],
1138 import_script=os_scripts['import'],
1139 rename_script=os_scripts['rename'],
1140 api_version=api_version)
1143 def GrowBlockDevice(disk, amount):
1144 """Grow a stack of block devices.
1146 This function is called recursively, with the childrens being the
1150 disk: the disk to be grown
1152 Returns: a tuple of (status, result), with:
1153 status: the result (true/false) of the operation
1154 result: the error message if the operation failed, otherwise not used
1157 r_dev = _RecursiveFindBD(disk)
1159 return False, "Cannot find block device %s" % (disk,)
1163 except errors.BlockDeviceError, err:
1164 return False, str(err)
1169 def SnapshotBlockDevice(disk):
1170 """Create a snapshot copy of a block device.
1172 This function is called recursively, and the snapshot is actually created
1173 just for the leaf lvm backend device.
1176 disk: the disk to be snapshotted
1179 a config entry for the actual lvm device snapshotted.
1183 if len(disk.children) == 1:
1184 # only one child, let's recurse on it
1185 return SnapshotBlockDevice(disk.children[0])
1187 # more than one child, choose one that matches
1188 for child in disk.children:
1189 if child.size == disk.size:
1190 # return implies breaking the loop
1191 return SnapshotBlockDevice(child)
1192 elif disk.dev_type == constants.LD_LV:
1193 r_dev = _RecursiveFindBD(disk)
1194 if r_dev is not None:
1195 # let's stay on the safe side and ask for the full size, for now
1196 return r_dev.Snapshot(disk.size)
1200 raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1201 " '%s' of type '%s'" %
1202 (disk.unique_id, disk.dev_type))
1205 def ExportSnapshot(disk, dest_node, instance):
1206 """Export a block device snapshot to a remote node.
1209 disk: the snapshot block device
1210 dest_node: the node to send the image to
1211 instance: instance being exported
1214 True if successful, False otherwise.
1217 inst_os = OSFromDisk(instance.os)
1218 export_script = inst_os.export_script
1220 logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1221 instance.name, int(time.time()))
1222 if not os.path.exists(constants.LOG_OS_DIR):
1223 os.mkdir(constants.LOG_OS_DIR, 0750)
1225 real_os_dev = _RecursiveFindBD(disk)
1226 if real_os_dev is None:
1227 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1231 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1232 destfile = disk.physical_id[1]
1234 # the target command is built out of three individual commands,
1235 # which are joined by pipes; we check each individual command for
1238 expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1239 export_script, instance.name,
1240 real_os_dev.dev_path, logfile)
1244 destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1245 destdir, destdir, destfile)
1246 remotecmd = _GetSshRunner().BuildCmd(dest_node, constants.GANETI_RUNAS,
1249 # all commands have been checked, so we're safe to combine them
1250 command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1252 result = utils.RunCmd(command)
1255 logger.Error("os snapshot export command '%s' returned error: %s"
1257 (command, result.fail_reason, result.output))
1263 def FinalizeExport(instance, snap_disks):
1264 """Write out the export configuration information.
1267 instance: instance configuration
1268 snap_disks: snapshot block devices
1271 False in case of error, True otherwise.
1274 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1275 finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1277 config = objects.SerializableConfigParser()
1279 config.add_section(constants.INISECT_EXP)
1280 config.set(constants.INISECT_EXP, 'version', '0')
1281 config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1282 config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1283 config.set(constants.INISECT_EXP, 'os', instance.os)
1284 config.set(constants.INISECT_EXP, 'compression', 'gzip')
1286 config.add_section(constants.INISECT_INS)
1287 config.set(constants.INISECT_INS, 'name', instance.name)
1288 config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1289 config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1290 config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1293 for nic_count, nic in enumerate(instance.nics):
1294 config.set(constants.INISECT_INS, 'nic%d_mac' %
1295 nic_count, '%s' % nic.mac)
1296 config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1297 config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1298 # TODO: redundant: on load can read nics until it doesn't exist
1299 config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1302 for disk_count, disk in enumerate(snap_disks):
1303 config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1304 ('%s' % disk.iv_name))
1305 config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1306 ('%s' % disk.physical_id[1]))
1307 config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1309 config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1311 cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1312 cfo = open(cff, 'w')
1318 shutil.rmtree(finaldestdir, True)
1319 shutil.move(destdir, finaldestdir)
1324 def ExportInfo(dest):
1325 """Get export configuration information.
1328 dest: directory containing the export
1331 A serializable config file containing the export info.
1334 cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1336 config = objects.SerializableConfigParser()
1339 if (not config.has_section(constants.INISECT_EXP) or
1340 not config.has_section(constants.INISECT_INS)):
1346 def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1347 """Import an os image into an instance.
1350 instance: the instance object
1351 os_disk: the instance-visible name of the os device
1352 swap_disk: the instance-visible name of the swap device
1353 src_node: node holding the source image
1354 src_image: path to the source image on src_node
1357 False in case of error, True otherwise.
1360 inst_os = OSFromDisk(instance.os)
1361 import_script = inst_os.import_script
1363 os_device = instance.FindDisk(os_disk)
1364 if os_device is None:
1365 logger.Error("Can't find this device-visible name '%s'" % os_disk)
1368 swap_device = instance.FindDisk(swap_disk)
1369 if swap_device is None:
1370 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1373 real_os_dev = _RecursiveFindBD(os_device)
1374 if real_os_dev is None:
1375 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1379 real_swap_dev = _RecursiveFindBD(swap_device)
1380 if real_swap_dev is None:
1381 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1383 real_swap_dev.Open()
1385 logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1386 instance.name, int(time.time()))
1387 if not os.path.exists(constants.LOG_OS_DIR):
1388 os.mkdir(constants.LOG_OS_DIR, 0750)
1390 destcmd = utils.BuildShellCmd('cat %s', src_image)
1391 remotecmd = _GetSshRunner().BuildCmd(src_node, constants.GANETI_RUNAS,
1395 impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1396 inst_os.path, import_script, instance.name,
1397 real_os_dev.dev_path, real_swap_dev.dev_path,
1400 command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1402 result = utils.RunCmd(command)
1405 logger.Error("os import command '%s' returned error: %s"
1407 (command, result.fail_reason, result.output))
1414 """Return a list of exports currently available on this machine.
1417 if os.path.isdir(constants.EXPORT_DIR):
1418 return utils.ListVisibleFiles(constants.EXPORT_DIR)
1423 def RemoveExport(export):
1424 """Remove an existing export from the node.
1427 export: the name of the export to remove
1430 False in case of error, True otherwise.
1433 target = os.path.join(constants.EXPORT_DIR, export)
1435 shutil.rmtree(target)
1436 # TODO: catch some of the relevant exceptions and provide a pretty
1437 # error message if rmtree fails.
1442 def RenameBlockDevices(devlist):
1443 """Rename a list of block devices.
1445 The devlist argument is a list of tuples (disk, new_logical,
1446 new_physical). The return value will be a combined boolean result
1447 (True only if all renames succeeded).
1451 for disk, unique_id in devlist:
1452 dev = _RecursiveFindBD(disk)
1457 old_rpath = dev.dev_path
1458 dev.Rename(unique_id)
1459 new_rpath = dev.dev_path
1460 if old_rpath != new_rpath:
1461 DevCacheManager.RemoveCache(old_rpath)
1462 # FIXME: we should add the new cache information here, like:
1463 # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1464 # but we don't have the owner here - maybe parse from existing
1465 # cache? for now, we only lose lvm data when we rename, which
1466 # is less critical than DRBD or MD
1467 except errors.BlockDeviceError, err:
1468 logger.Error("Can't rename device '%s' to '%s': %s" %
1469 (dev, unique_id, err))
1474 def _TransformFileStorageDir(file_storage_dir):
1475 """Checks whether given file_storage_dir is valid.
1477 Checks wheter the given file_storage_dir is within the cluster-wide
1478 default file_storage_dir stored in SimpleStore. Only paths under that
1479 directory are allowed.
1482 file_storage_dir: string with path
1485 normalized file_storage_dir (string) if valid, None otherwise
1488 file_storage_dir = os.path.normpath(file_storage_dir)
1489 base_file_storage_dir = ssconf.SimpleStore().GetFileStorageDir()
1490 if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
1491 base_file_storage_dir):
1492 logger.Error("file storage directory '%s' is not under base file"
1493 " storage directory '%s'" %
1494 (file_storage_dir, base_file_storage_dir))
1496 return file_storage_dir
1499 def CreateFileStorageDir(file_storage_dir):
1500 """Create file storage directory.
1503 file_storage_dir: string containing the path
1506 tuple with first element a boolean indicating wheter dir
1507 creation was successful or not
1510 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1512 if not file_storage_dir:
1515 if os.path.exists(file_storage_dir):
1516 if not os.path.isdir(file_storage_dir):
1517 logger.Error("'%s' is not a directory" % file_storage_dir)
1521 os.makedirs(file_storage_dir, 0750)
1522 except OSError, err:
1523 logger.Error("Cannot create file storage directory '%s': %s" %
1524 (file_storage_dir, err))
1529 def RemoveFileStorageDir(file_storage_dir):
1530 """Remove file storage directory.
1532 Remove it only if it's empty. If not log an error and return.
1535 file_storage_dir: string containing the path
1538 tuple with first element a boolean indicating wheter dir
1539 removal was successful or not
1542 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1544 if not file_storage_dir:
1547 if os.path.exists(file_storage_dir):
1548 if not os.path.isdir(file_storage_dir):
1549 logger.Error("'%s' is not a directory" % file_storage_dir)
1551 # deletes dir only if empty, otherwise we want to return False
1553 os.rmdir(file_storage_dir)
1554 except OSError, err:
1555 logger.Error("Cannot remove file storage directory '%s': %s" %
1556 (file_storage_dir, err))
1561 def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
1562 """Rename the file storage directory.
1565 old_file_storage_dir: string containing the old path
1566 new_file_storage_dir: string containing the new path
1569 tuple with first element a boolean indicating wheter dir
1570 rename was successful or not
1573 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
1574 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
1576 if not old_file_storage_dir or not new_file_storage_dir:
1579 if not os.path.exists(new_file_storage_dir):
1580 if os.path.isdir(old_file_storage_dir):
1582 os.rename(old_file_storage_dir, new_file_storage_dir)
1583 except OSError, err:
1584 logger.Error("Cannot rename '%s' to '%s': %s"
1585 % (old_file_storage_dir, new_file_storage_dir, err))
1588 logger.Error("'%s' is not a directory" % old_file_storage_dir)
1591 if os.path.exists(old_file_storage_dir):
1592 logger.Error("Cannot rename '%s' to '%s'. Both locations exist." %
1593 old_file_storage_dir, new_file_storage_dir)
1598 def CloseBlockDevices(disks):
1599 """Closes the given block devices.
1601 This means they will be switched to secondary mode (in case of DRBD).
1606 rd = _RecursiveFindBD(cf)
1608 return (False, "Can't find device %s" % cf)
1615 except errors.BlockDeviceError, err:
1616 msg.append(str(err))
1618 return (False, "Can't make devices secondary: %s" % ",".join(msg))
1620 return (True, "All devices secondary")
1623 class HooksRunner(object):
1626 This class is instantiated on the node side (ganeti-noded) and not on
1630 RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1632 def __init__(self, hooks_base_dir=None):
1633 """Constructor for hooks runner.
1636 - hooks_base_dir: if not None, this overrides the
1637 constants.HOOKS_BASE_DIR (useful for unittests)
1640 if hooks_base_dir is None:
1641 hooks_base_dir = constants.HOOKS_BASE_DIR
1642 self._BASE_DIR = hooks_base_dir
1645 def ExecHook(script, env):
1646 """Exec one hook script.
1649 - script: the full path to the script
1650 - env: the environment with which to exec the script
1653 # exec the process using subprocess and log the output
1656 fdstdin = open("/dev/null", "r")
1657 child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1658 stderr=subprocess.STDOUT, close_fds=True,
1659 shell=False, cwd="/", env=env)
1662 output = child.stdout.read(4096)
1663 child.stdout.close()
1664 except EnvironmentError, err:
1665 output += "Hook script error: %s" % str(err)
1669 result = child.wait()
1671 except EnvironmentError, err:
1672 if err.errno == errno.EINTR:
1676 # try not to leak fds
1677 for fd in (fdstdin, ):
1681 except EnvironmentError, err:
1682 # just log the error
1683 #logger.Error("While closing fd %s: %s" % (fd, err))
1686 return result == 0, output
1688 def RunHooks(self, hpath, phase, env):
1689 """Run the scripts in the hooks directory.
1691 This method will not be usually overriden by child opcodes.
1694 if phase == constants.HOOKS_PHASE_PRE:
1696 elif phase == constants.HOOKS_PHASE_POST:
1699 raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1702 subdir = "%s-%s.d" % (hpath, suffix)
1703 dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1705 dir_contents = utils.ListVisibleFiles(dir_name)
1706 except OSError, err:
1710 # we use the standard python sort order,
1711 # so 00name is the recommended naming scheme
1713 for relname in dir_contents:
1714 fname = os.path.join(dir_name, relname)
1715 if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1716 self.RE_MASK.match(relname) is not None):
1717 rrval = constants.HKR_SKIP
1720 result, output = self.ExecHook(fname, env)
1722 rrval = constants.HKR_FAIL
1724 rrval = constants.HKR_SUCCESS
1725 rr.append(("%s/%s" % (subdir, relname), rrval, output))
1730 class IAllocatorRunner(object):
1731 """IAllocator runner.
1733 This class is instantiated on the node side (ganeti-noded) and not on
1737 def Run(self, name, idata):
1738 """Run an iallocator script.
1740 Return value: tuple of:
1741 - run status (one of the IARUN_ constants)
1744 - fail reason (as from utils.RunResult)
1747 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
1749 if alloc_script is None:
1750 return (constants.IARUN_NOTFOUND, None, None, None)
1752 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
1756 result = utils.RunCmd([alloc_script, fin_name])
1758 return (constants.IARUN_FAILURE, result.stdout, result.stderr,
1763 return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
1766 class DevCacheManager(object):
1767 """Simple class for managing a cache of block device information.
1770 _DEV_PREFIX = "/dev/"
1771 _ROOT_DIR = constants.BDEV_CACHE_DIR
1774 def _ConvertPath(cls, dev_path):
1775 """Converts a /dev/name path to the cache file name.
1777 This replaces slashes with underscores and strips the /dev
1778 prefix. It then returns the full path to the cache file
1781 if dev_path.startswith(cls._DEV_PREFIX):
1782 dev_path = dev_path[len(cls._DEV_PREFIX):]
1783 dev_path = dev_path.replace("/", "_")
1784 fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1788 def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1789 """Updates the cache information for a given device.
1792 if dev_path is None:
1793 logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1795 fpath = cls._ConvertPath(dev_path)
1801 iv_name = "not_visible"
1802 fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1804 utils.WriteFile(fpath, data=fdata)
1805 except EnvironmentError, err:
1806 logger.Error("Can't update bdev cache for %s, error %s" %
1807 (dev_path, str(err)))
1810 def RemoveCache(cls, dev_path):
1811 """Remove data for a dev_path.
1814 if dev_path is None:
1815 logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1817 fpath = cls._ConvertPath(dev_path)
1819 utils.RemoveFile(fpath)
1820 except EnvironmentError, err:
1821 logger.Error("Can't update bdev cache for %s, error %s" %
1822 (dev_path, str(err)))