4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Functions used by the node daemon"""
35 from ganeti import logger
36 from ganeti import errors
37 from ganeti import utils
38 from ganeti import ssh
39 from ganeti import hypervisor
40 from ganeti import constants
41 from ganeti import bdev
42 from ganeti import objects
43 from ganeti import ssconf
47 """Activate local node as master node.
49 There are two needed steps for this:
50 - run the master script
51 - register the cron script
54 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
57 logger.Error("could not activate cluster interface with command %s,"
58 " error: '%s'" % (result.cmd, result.output))
65 """Deactivate this node as master.
68 - run the master stop script
69 - remove link to master cron script.
72 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
75 logger.Error("could not deactivate cluster interface with command %s,"
76 " error: '%s'" % (result.cmd, result.output))
82 def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
83 """Joins this node to the cluster.
85 This does the following:
86 - updates the hostkeys of the machine (rsa and dsa)
87 - adds the ssh private key to the user
88 - adds the ssh public key to the users' authorized_keys file
91 sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
92 (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
93 (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
94 (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
95 for name, content, mode in sshd_keys:
96 utils.WriteFile(name, data=content, mode=mode)
99 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
101 except errors.OpExecError, err:
102 logger.Error("Error while processing user ssh files: %s" % err)
105 for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
106 utils.WriteFile(name, data=content, mode=0600)
108 utils.AddAuthorizedKey(auth_keys, sshpub)
110 utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
116 """Cleans up the current node and prepares it to be removed from the cluster.
119 if os.path.isdir(constants.DATA_DIR):
120 for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
121 full_name = os.path.join(constants.DATA_DIR, rel_name)
122 if os.path.isfile(full_name) and not os.path.islink(full_name):
123 utils.RemoveFile(full_name)
126 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
127 except errors.OpExecError, err:
128 logger.Error("Error while processing ssh files: %s" % err)
131 f = open(pub_key, 'r')
133 utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
137 utils.RemoveFile(priv_key)
138 utils.RemoveFile(pub_key)
141 def GetNodeInfo(vgname):
142 """Gives back a hash with different informations about the node.
145 { 'vg_size' : xxx, 'vg_free' : xxx, 'memory_domain0': xxx,
146 'memory_free' : xxx, 'memory_total' : xxx }
148 vg_size is the size of the configured volume group in MiB
149 vg_free is the free size of the volume group in MiB
150 memory_dom0 is the memory allocated for domain0 in MiB
151 memory_free is the currently available (free) ram in MiB
152 memory_total is the total number of ram in MiB
156 vginfo = _GetVGInfo(vgname)
157 outputarray['vg_size'] = vginfo['vg_size']
158 outputarray['vg_free'] = vginfo['vg_free']
160 hyper = hypervisor.GetHypervisor()
161 hyp_info = hyper.GetNodeInfo()
162 if hyp_info is not None:
163 outputarray.update(hyp_info)
165 f = open("/proc/sys/kernel/random/boot_id", 'r')
167 outputarray["bootid"] = f.read(128).rstrip("\n")
174 def VerifyNode(what):
175 """Verify the status of the local node.
178 what - a dictionary of things to check:
179 'filelist' : list of files for which to compute checksums
180 'nodelist' : list of nodes we should check communication with
181 'hypervisor': run the hypervisor-specific verify
183 Requested files on local node are checksummed and the result returned.
185 The nodelist is traversed, with the following checks being made
187 - known_hosts key correct
188 - correct resolving of node name (target node returns its own hostname
189 by ssh-execution of 'hostname', result compared against name in list.
194 if 'hypervisor' in what:
195 result['hypervisor'] = hypervisor.GetHypervisor().Verify()
197 if 'filelist' in what:
198 result['filelist'] = utils.FingerprintFiles(what['filelist'])
200 if 'nodelist' in what:
201 result['nodelist'] = {}
202 for node in what['nodelist']:
203 success, message = ssh.VerifyNodeHostname(node)
205 result['nodelist'][node] = message
209 def GetVolumeList(vg_name):
210 """Compute list of logical volumes and their size.
213 dictionary of all partions (key) with their size (in MiB), inactive
215 {'test1': ('20.06', True, True)}
220 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
221 "--separator=%s" % sep,
222 "-olv_name,lv_size,lv_attr", vg_name])
224 logger.Error("Failed to list logical volumes, lvs output: %s" %
228 for line in result.stdout.splitlines():
229 line = line.strip().rstrip(sep)
230 name, size, attr = line.split(sep)
233 inactive = attr[4] == '-'
234 online = attr[5] == 'o'
235 lvs[name] = (size, inactive, online)
240 def ListVolumeGroups():
241 """List the volume groups and their size.
244 Dictionary with keys volume name and values the size of the volume
247 return utils.ListVolumeGroups()
251 """List all volumes on this node.
254 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
256 "--options=lv_name,lv_size,devices,vg_name"])
258 logger.Error("Failed to list logical volumes, lvs output: %s" %
264 return dev.split('(')[0]
270 'name': line[0].strip(),
271 'size': line[1].strip(),
272 'dev': parse_dev(line[2].strip()),
273 'vg': line[3].strip(),
276 return [map_line(line.split('|')) for line in result.stdout.splitlines()]
279 def BridgesExist(bridges_list):
280 """Check if a list of bridges exist on the current node.
283 True if all of them exist, false otherwise
286 for bridge in bridges_list:
287 if not utils.BridgeExists(bridge):
293 def GetInstanceList():
294 """Provides a list of instances.
297 A list of all running instances on the current node
298 - instance1.example.com
299 - instance2.example.com
303 names = hypervisor.GetHypervisor().ListInstances()
304 except errors.HypervisorError, err:
305 logger.Error("error enumerating instances: %s" % str(err))
311 def GetInstanceInfo(instance):
312 """Gives back the informations about an instance as a dictionary.
315 instance: name of the instance (ex. instance1.example.com)
318 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
320 memory: memory size of instance (int)
321 state: xen state of instance (string)
322 time: cpu time of instance (float)
327 iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
328 if iinfo is not None:
329 output['memory'] = iinfo[2]
330 output['state'] = iinfo[4]
331 output['time'] = iinfo[5]
336 def GetAllInstancesInfo():
337 """Gather data about all instances.
339 This is the equivalent of `GetInstanceInfo()`, except that it
340 computes data for all instances at once, thus being faster if one
341 needs data about more than one instance.
343 Returns: a dictionary of dictionaries, keys being the instance name,
345 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
347 memory: memory size of instance (int)
348 state: xen state of instance (string)
349 time: cpu time of instance (float)
350 vcpus: the number of cpus
355 iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
357 for name, inst_id, memory, vcpus, state, times in iinfo:
368 def AddOSToInstance(instance, os_disk, swap_disk):
369 """Add an OS to an instance.
372 instance: the instance object
373 os_disk: the instance-visible name of the os device
374 swap_disk: the instance-visible name of the swap device
377 inst_os = OSFromDisk(instance.os)
379 create_script = inst_os.create_script
381 os_device = instance.FindDisk(os_disk)
382 if os_device is None:
383 logger.Error("Can't find this device-visible name '%s'" % os_disk)
386 swap_device = instance.FindDisk(swap_disk)
387 if swap_device is None:
388 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
391 real_os_dev = _RecursiveFindBD(os_device)
392 if real_os_dev is None:
393 raise errors.BlockDeviceError("Block device '%s' is not set up" %
397 real_swap_dev = _RecursiveFindBD(swap_device)
398 if real_swap_dev is None:
399 raise errors.BlockDeviceError("Block device '%s' is not set up" %
403 logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
404 instance.name, int(time.time()))
405 if not os.path.exists(constants.LOG_OS_DIR):
406 os.mkdir(constants.LOG_OS_DIR, 0750)
408 command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
409 inst_os.path, create_script, instance.name,
410 real_os_dev.dev_path, real_swap_dev.dev_path,
413 result = utils.RunCmd(command)
415 logger.Error("os create command '%s' returned error: %s, logfile: %s,"
417 (command, result.fail_reason, logfile, result.output))
423 def RunRenameInstance(instance, old_name, os_disk, swap_disk):
424 """Run the OS rename script for an instance.
427 instance: the instance object
428 old_name: the old name of the instance
429 os_disk: the instance-visible name of the os device
430 swap_disk: the instance-visible name of the swap device
433 inst_os = OSFromDisk(instance.os)
435 script = inst_os.rename_script
437 os_device = instance.FindDisk(os_disk)
438 if os_device is None:
439 logger.Error("Can't find this device-visible name '%s'" % os_disk)
442 swap_device = instance.FindDisk(swap_disk)
443 if swap_device is None:
444 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
447 real_os_dev = _RecursiveFindBD(os_device)
448 if real_os_dev is None:
449 raise errors.BlockDeviceError("Block device '%s' is not set up" %
453 real_swap_dev = _RecursiveFindBD(swap_device)
454 if real_swap_dev is None:
455 raise errors.BlockDeviceError("Block device '%s' is not set up" %
459 logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
461 instance.name, int(time.time()))
462 if not os.path.exists(constants.LOG_OS_DIR):
463 os.mkdir(constants.LOG_OS_DIR, 0750)
465 command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
466 inst_os.path, script, old_name, instance.name,
467 real_os_dev.dev_path, real_swap_dev.dev_path,
470 result = utils.RunCmd(command)
473 logger.Error("os create command '%s' returned error: %s"
475 (command, result.fail_reason, result.output))
481 def _GetVGInfo(vg_name):
482 """Get informations about the volume group.
485 vg_name: the volume group
488 { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
490 vg_size is the total size of the volume group in MiB
491 vg_free is the free size of the volume group in MiB
492 pv_count are the number of physical disks in that vg
495 retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
496 "--nosuffix", "--units=m", "--separator=:", vg_name])
499 errmsg = "volume group %s not present" % vg_name
501 raise errors.LVMError(errmsg)
502 valarr = retval.stdout.strip().split(':')
504 "vg_size": int(round(float(valarr[0]), 0)),
505 "vg_free": int(round(float(valarr[1]), 0)),
506 "pv_count": int(valarr[2]),
511 def _GatherBlockDevs(instance):
512 """Set up an instance's block device(s).
514 This is run on the primary node at instance startup. The block
515 devices must be already assembled.
519 for disk in instance.disks:
520 device = _RecursiveFindBD(disk)
522 raise errors.BlockDeviceError("Block device '%s' is not set up." %
525 block_devices.append((disk, device))
529 def StartInstance(instance, extra_args):
530 """Start an instance.
533 instance - name of instance to start.
536 running_instances = GetInstanceList()
538 if instance.name in running_instances:
541 block_devices = _GatherBlockDevs(instance)
542 hyper = hypervisor.GetHypervisor()
545 hyper.StartInstance(instance, block_devices, extra_args)
546 except errors.HypervisorError, err:
547 logger.Error("Failed to start instance: %s" % err)
553 def ShutdownInstance(instance):
554 """Shut an instance down.
557 instance - name of instance to shutdown.
560 running_instances = GetInstanceList()
562 if instance.name not in running_instances:
565 hyper = hypervisor.GetHypervisor()
567 hyper.StopInstance(instance)
568 except errors.HypervisorError, err:
569 logger.Error("Failed to stop instance: %s" % err)
572 # test every 10secs for 2min
576 for dummy in range(11):
577 if instance.name not in GetInstanceList():
581 # the shutdown did not succeed
582 logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
585 hyper.StopInstance(instance, force=True)
586 except errors.HypervisorError, err:
587 logger.Error("Failed to stop instance: %s" % err)
591 if instance.name in GetInstanceList():
592 logger.Error("could not shutdown instance '%s' even by destroy")
598 def RebootInstance(instance, reboot_type, extra_args):
599 """Reboot an instance.
602 instance - name of instance to reboot
603 reboot_type - how to reboot [soft,hard,full]
606 running_instances = GetInstanceList()
608 if instance.name not in running_instances:
609 logger.Error("Cannot reboot instance that is not running")
612 hyper = hypervisor.GetHypervisor()
613 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
615 hyper.RebootInstance(instance)
616 except errors.HypervisorError, err:
617 logger.Error("Failed to soft reboot instance: %s" % err)
619 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
621 ShutdownInstance(instance)
622 StartInstance(instance, extra_args)
623 except errors.HypervisorError, err:
624 logger.Error("Failed to hard reboot instance: %s" % err)
627 raise errors.ParameterError("reboot_type invalid")
633 def CreateBlockDevice(disk, size, owner, on_primary, info):
634 """Creates a block device for an instance.
637 bdev: a ganeti.objects.Disk object
638 size: the size of the physical underlying devices
639 do_open: if the device should be `Assemble()`-d and
640 `Open()`-ed after creation
643 the new unique_id of the device (this can sometime be
644 computed only after creation), or None. On secondary nodes,
645 it's not required to return anything.
650 for child in disk.children:
651 crdev = _RecursiveAssembleBD(child, owner, on_primary)
652 if on_primary or disk.AssembleOnSecondary():
653 # we need the children open in case the device itself has to
660 device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
661 if device is not None:
662 logger.Info("removing existing device %s" % disk)
664 except errors.BlockDeviceError, err:
667 device = bdev.Create(disk.dev_type, disk.physical_id,
670 raise ValueError("Can't create child device for %s, %s" %
672 if on_primary or disk.AssembleOnSecondary():
673 if not device.Assemble():
674 errorstring = "Can't assemble device after creation"
675 logger.Error(errorstring)
676 raise errors.BlockDeviceError("%s, very unusual event - check the node"
677 " daemon logs" % errorstring)
678 device.SetSyncSpeed(constants.SYNC_SPEED)
679 if on_primary or disk.OpenOnSecondary():
680 device.Open(force=True)
681 DevCacheManager.UpdateCache(device.dev_path, owner,
682 on_primary, disk.iv_name)
686 physical_id = device.unique_id
690 def RemoveBlockDevice(disk):
691 """Remove a block device.
693 This is intended to be called recursively.
697 # since we are removing the device, allow a partial match
698 # this allows removal of broken mirrors
699 rdev = _RecursiveFindBD(disk, allow_partial=True)
700 except errors.BlockDeviceError, err:
701 # probably can't attach
702 logger.Info("Can't attach to device %s in remove" % disk)
705 r_path = rdev.dev_path
706 result = rdev.Remove()
708 DevCacheManager.RemoveCache(r_path)
712 for child in disk.children:
713 result = result and RemoveBlockDevice(child)
717 def _RecursiveAssembleBD(disk, owner, as_primary):
718 """Activate a block device for an instance.
720 This is run on the primary and secondary nodes for an instance.
722 This function is called recursively.
725 disk: a objects.Disk object
726 as_primary: if we should make the block device read/write
729 the assembled device or None (in case no device was assembled)
731 If the assembly is not successful, an exception is raised.
736 mcn = disk.ChildrenNeeded()
738 mcn = 0 # max number of Nones allowed
740 mcn = len(disk.children) - mcn # max number of Nones
741 for chld_disk in disk.children:
743 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
744 except errors.BlockDeviceError, err:
745 if children.count(None) >= mcn:
748 logger.Debug("Error in child activation: %s" % str(err))
749 children.append(cdev)
751 if as_primary or disk.AssembleOnSecondary():
752 r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
753 r_dev.SetSyncSpeed(constants.SYNC_SPEED)
755 if as_primary or disk.OpenOnSecondary():
759 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
760 as_primary, disk.iv_name)
767 def AssembleBlockDevice(disk, owner, as_primary):
768 """Activate a block device for an instance.
770 This is a wrapper over _RecursiveAssembleBD.
773 a /dev path for primary nodes
774 True for secondary nodes
777 result = _RecursiveAssembleBD(disk, owner, as_primary)
778 if isinstance(result, bdev.BlockDev):
779 result = result.dev_path
783 def ShutdownBlockDevice(disk):
784 """Shut down a block device.
786 First, if the device is assembled (can `Attach()`), then the device
787 is shutdown. Then the children of the device are shutdown.
789 This function is called recursively. Note that we don't cache the
790 children or such, as oppossed to assemble, shutdown of different
791 devices doesn't require that the upper device was active.
794 r_dev = _RecursiveFindBD(disk)
795 if r_dev is not None:
796 r_path = r_dev.dev_path
797 result = r_dev.Shutdown()
799 DevCacheManager.RemoveCache(r_path)
803 for child in disk.children:
804 result = result and ShutdownBlockDevice(child)
808 def MirrorAddChildren(parent_cdev, new_cdevs):
809 """Extend a mirrored block device.
812 parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
813 if parent_bdev is None:
814 logger.Error("Can't find parent device")
816 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
817 if new_bdevs.count(None) > 0:
818 logger.Error("Can't find new device(s) to add: %s:%s" %
819 (new_bdevs, new_cdevs))
821 parent_bdev.AddChildren(new_bdevs)
825 def MirrorRemoveChildren(parent_cdev, new_cdevs):
826 """Shrink a mirrored block device.
829 parent_bdev = _RecursiveFindBD(parent_cdev)
830 if parent_bdev is None:
831 logger.Error("Can't find parent in remove children: %s" % parent_cdev)
834 for disk in new_cdevs:
835 rpath = disk.StaticDevPath()
837 bd = _RecursiveFindBD(disk)
839 logger.Error("Can't find dynamic device %s while removing children" %
843 devs.append(bd.dev_path)
846 parent_bdev.RemoveChildren(devs)
850 def GetMirrorStatus(disks):
851 """Get the mirroring status of a list of devices.
854 disks: list of `objects.Disk`
857 list of (mirror_done, estimated_time) tuples, which
858 are the result of bdev.BlockDevice.CombinedSyncStatus()
863 rbd = _RecursiveFindBD(dsk)
865 raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
866 stats.append(rbd.CombinedSyncStatus())
870 def _RecursiveFindBD(disk, allow_partial=False):
871 """Check if a device is activated.
873 If so, return informations about the real device.
876 disk: the objects.Disk instance
877 allow_partial: don't abort the find if a child of the
878 device can't be found; this is intended to be
879 used when repairing mirrors
882 None if the device can't be found
883 otherwise the device instance
888 for chdisk in disk.children:
889 children.append(_RecursiveFindBD(chdisk))
891 return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
894 def FindBlockDevice(disk):
895 """Check if a device is activated.
897 If so, return informations about the real device.
900 disk: the objects.Disk instance
902 None if the device can't be found
903 (device_path, major, minor, sync_percent, estimated_time, is_degraded)
906 rbd = _RecursiveFindBD(disk)
909 return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
912 def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
913 """Write a file to the filesystem.
915 This allows the master to overwrite(!) a file. It will only perform
916 the operation if the file belongs to a list of configuration files.
919 if not os.path.isabs(file_name):
920 logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
924 allowed_files = [constants.CLUSTER_CONF_FILE, "/etc/hosts",
925 constants.SSH_KNOWN_HOSTS_FILE]
926 allowed_files.extend(ssconf.SimpleStore().GetFileList())
927 if file_name not in allowed_files:
928 logger.Error("Filename passed to UploadFile not in allowed"
929 " upload targets: '%s'" % file_name)
932 dir_name, small_name = os.path.split(file_name)
933 fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
934 # here we need to make sure we remove the temp file, if any error
937 os.chown(new_name, uid, gid)
938 os.chmod(new_name, mode)
941 os.utime(new_name, (atime, mtime))
942 os.rename(new_name, file_name)
945 utils.RemoveFile(new_name)
949 def _ErrnoOrStr(err):
950 """Format an EnvironmentError exception.
952 If the `err` argument has an errno attribute, it will be looked up
953 and converted into a textual EXXXX description. Otherwise the string
954 representation of the error will be returned.
957 if hasattr(err, 'errno'):
958 detail = errno.errorcode[err.errno]
964 def _OSSearch(name, search_path=None):
965 """Search for OSes with the given name in the search_path.
968 name: The name of the OS to look for
969 search_path: List of dirs to search (defaults to constants.OS_SEARCH_PATH)
972 The base_dir the OS resides in
975 if search_path is None:
976 search_path = constants.OS_SEARCH_PATH
978 for dir_name in search_path:
979 t_os_dir = os.path.sep.join([dir_name, name])
980 if os.path.isdir(t_os_dir):
986 def _OSOndiskVersion(name, os_dir):
987 """Compute and return the API version of a given OS.
989 This function will try to read the API version of the os given by
990 the 'name' parameter and residing in the 'os_dir' directory.
992 Return value will be either an integer denoting the version or None in the
993 case when this is not a valid OS name.
996 api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
999 st = os.stat(api_file)
1000 except EnvironmentError, err:
1001 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1002 " found (%s)" % _ErrnoOrStr(err))
1004 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1005 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1011 api_version = f.read(256)
1014 except EnvironmentError, err:
1015 raise errors.InvalidOS(name, os_dir, "error while reading the"
1016 " API version (%s)" % _ErrnoOrStr(err))
1018 api_version = api_version.strip()
1020 api_version = int(api_version)
1021 except (TypeError, ValueError), err:
1022 raise errors.InvalidOS(name, os_dir,
1023 "API version is not integer (%s)" % str(err))
1028 def DiagnoseOS(top_dirs=None):
1029 """Compute the validity for all OSes.
1031 Returns an OS object for each name in all the given top directories
1032 (if not given defaults to constants.OS_SEARCH_PATH)
1038 if top_dirs is None:
1039 top_dirs = constants.OS_SEARCH_PATH
1042 for dir_name in top_dirs:
1043 if os.path.isdir(dir_name):
1045 f_names = utils.ListVisibleFiles(dir_name)
1046 except EnvironmentError, err:
1047 logger.Error("Can't list the OS directory %s: %s" %
1048 (dir_name, str(err)))
1050 for name in f_names:
1052 os_inst = OSFromDisk(name, base_dir=dir_name)
1053 result.append(os_inst)
1054 except errors.InvalidOS, err:
1055 result.append(objects.OS.FromInvalidOS(err))
1060 def OSFromDisk(name, base_dir=None):
1061 """Create an OS instance from disk.
1063 This function will return an OS instance if the given name is a
1064 valid OS name. Otherwise, it will raise an appropriate
1065 `errors.InvalidOS` exception, detailing why this is not a valid
1069 os_dir: Directory containing the OS scripts. Defaults to a search
1070 in all the OS_SEARCH_PATH directories.
1074 if base_dir is None:
1075 base_dir = _OSSearch(name)
1077 if base_dir is None:
1078 raise errors.InvalidOS(name, None, "OS dir not found in search path")
1080 os_dir = os.path.sep.join([base_dir, name])
1081 api_version = _OSOndiskVersion(name, os_dir)
1083 if api_version != constants.OS_API_VERSION:
1084 raise errors.InvalidOS(name, os_dir, "API version mismatch"
1085 " (found %s want %s)"
1086 % (api_version, constants.OS_API_VERSION))
1088 # OS Scripts dictionary, we will populate it with the actual script names
1089 os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1091 for script in os_scripts:
1092 os_scripts[script] = os.path.sep.join([os_dir, script])
1095 st = os.stat(os_scripts[script])
1096 except EnvironmentError, err:
1097 raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1098 (script, _ErrnoOrStr(err)))
1100 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1101 raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1104 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1105 raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1109 return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1110 create_script=os_scripts['create'],
1111 export_script=os_scripts['export'],
1112 import_script=os_scripts['import'],
1113 rename_script=os_scripts['rename'],
1114 api_version=api_version)
1117 def SnapshotBlockDevice(disk):
1118 """Create a snapshot copy of a block device.
1120 This function is called recursively, and the snapshot is actually created
1121 just for the leaf lvm backend device.
1124 disk: the disk to be snapshotted
1127 a config entry for the actual lvm device snapshotted.
1131 if len(disk.children) == 1:
1132 # only one child, let's recurse on it
1133 return SnapshotBlockDevice(disk.children[0])
1135 # more than one child, choose one that matches
1136 for child in disk.children:
1137 if child.size == disk.size:
1138 # return implies breaking the loop
1139 return SnapshotBlockDevice(child)
1140 elif disk.dev_type == constants.LD_LV:
1141 r_dev = _RecursiveFindBD(disk)
1142 if r_dev is not None:
1143 # let's stay on the safe side and ask for the full size, for now
1144 return r_dev.Snapshot(disk.size)
1148 raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1149 " '%s' of type '%s'" %
1150 (disk.unique_id, disk.dev_type))
1153 def ExportSnapshot(disk, dest_node, instance):
1154 """Export a block device snapshot to a remote node.
1157 disk: the snapshot block device
1158 dest_node: the node to send the image to
1159 instance: instance being exported
1162 True if successful, False otherwise.
1165 inst_os = OSFromDisk(instance.os)
1166 export_script = inst_os.export_script
1168 logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1169 instance.name, int(time.time()))
1170 if not os.path.exists(constants.LOG_OS_DIR):
1171 os.mkdir(constants.LOG_OS_DIR, 0750)
1173 real_os_dev = _RecursiveFindBD(disk)
1174 if real_os_dev is None:
1175 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1179 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1180 destfile = disk.physical_id[1]
1182 # the target command is built out of three individual commands,
1183 # which are joined by pipes; we check each individual command for
1186 expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1187 export_script, instance.name,
1188 real_os_dev.dev_path, logfile)
1192 destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1193 destdir, destdir, destfile)
1194 remotecmd = ssh.BuildSSHCmd(dest_node, constants.GANETI_RUNAS, destcmd)
1198 # all commands have been checked, so we're safe to combine them
1199 command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1201 result = utils.RunCmd(command)
1204 logger.Error("os snapshot export command '%s' returned error: %s"
1206 (command, result.fail_reason, result.output))
1212 def FinalizeExport(instance, snap_disks):
1213 """Write out the export configuration information.
1216 instance: instance configuration
1217 snap_disks: snapshot block devices
1220 False in case of error, True otherwise.
1223 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1224 finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1226 config = objects.SerializableConfigParser()
1228 config.add_section(constants.INISECT_EXP)
1229 config.set(constants.INISECT_EXP, 'version', '0')
1230 config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1231 config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1232 config.set(constants.INISECT_EXP, 'os', instance.os)
1233 config.set(constants.INISECT_EXP, 'compression', 'gzip')
1235 config.add_section(constants.INISECT_INS)
1236 config.set(constants.INISECT_INS, 'name', instance.name)
1237 config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1238 config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1239 config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1240 for nic_count, nic in enumerate(instance.nics):
1241 config.set(constants.INISECT_INS, 'nic%d_mac' %
1242 nic_count, '%s' % nic.mac)
1243 config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1244 # TODO: redundant: on load can read nics until it doesn't exist
1245 config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1247 for disk_count, disk in enumerate(snap_disks):
1248 config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1249 ('%s' % disk.iv_name))
1250 config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1251 ('%s' % disk.physical_id[1]))
1252 config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1254 config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1256 cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1257 cfo = open(cff, 'w')
1263 shutil.rmtree(finaldestdir, True)
1264 shutil.move(destdir, finaldestdir)
1269 def ExportInfo(dest):
1270 """Get export configuration information.
1273 dest: directory containing the export
1276 A serializable config file containing the export info.
1279 cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1281 config = objects.SerializableConfigParser()
1284 if (not config.has_section(constants.INISECT_EXP) or
1285 not config.has_section(constants.INISECT_INS)):
1291 def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1292 """Import an os image into an instance.
1295 instance: the instance object
1296 os_disk: the instance-visible name of the os device
1297 swap_disk: the instance-visible name of the swap device
1298 src_node: node holding the source image
1299 src_image: path to the source image on src_node
1302 False in case of error, True otherwise.
1305 inst_os = OSFromDisk(instance.os)
1306 import_script = inst_os.import_script
1308 os_device = instance.FindDisk(os_disk)
1309 if os_device is None:
1310 logger.Error("Can't find this device-visible name '%s'" % os_disk)
1313 swap_device = instance.FindDisk(swap_disk)
1314 if swap_device is None:
1315 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1318 real_os_dev = _RecursiveFindBD(os_device)
1319 if real_os_dev is None:
1320 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1324 real_swap_dev = _RecursiveFindBD(swap_device)
1325 if real_swap_dev is None:
1326 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1328 real_swap_dev.Open()
1330 logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1331 instance.name, int(time.time()))
1332 if not os.path.exists(constants.LOG_OS_DIR):
1333 os.mkdir(constants.LOG_OS_DIR, 0750)
1335 destcmd = utils.BuildShellCmd('cat %s', src_image)
1336 remotecmd = ssh.BuildSSHCmd(src_node, constants.GANETI_RUNAS, destcmd)
1339 impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1340 inst_os.path, import_script, instance.name,
1341 real_os_dev.dev_path, real_swap_dev.dev_path,
1344 command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1346 result = utils.RunCmd(command)
1349 logger.Error("os import command '%s' returned error: %s"
1351 (command, result.fail_reason, result.output))
1358 """Return a list of exports currently available on this machine.
1361 if os.path.isdir(constants.EXPORT_DIR):
1362 return utils.ListVisibleFiles(constants.EXPORT_DIR)
1367 def RemoveExport(export):
1368 """Remove an existing export from the node.
1371 export: the name of the export to remove
1374 False in case of error, True otherwise.
1377 target = os.path.join(constants.EXPORT_DIR, export)
1379 shutil.rmtree(target)
1380 # TODO: catch some of the relevant exceptions and provide a pretty
1381 # error message if rmtree fails.
1386 def RenameBlockDevices(devlist):
1387 """Rename a list of block devices.
1389 The devlist argument is a list of tuples (disk, new_logical,
1390 new_physical). The return value will be a combined boolean result
1391 (True only if all renames succeeded).
1395 for disk, unique_id in devlist:
1396 dev = _RecursiveFindBD(disk)
1401 old_rpath = dev.dev_path
1402 dev.Rename(unique_id)
1403 new_rpath = dev.dev_path
1404 if old_rpath != new_rpath:
1405 DevCacheManager.RemoveCache(old_rpath)
1406 # FIXME: we should add the new cache information here, like:
1407 # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1408 # but we don't have the owner here - maybe parse from existing
1409 # cache? for now, we only lose lvm data when we rename, which
1410 # is less critical than DRBD or MD
1411 except errors.BlockDeviceError, err:
1412 logger.Error("Can't rename device '%s' to '%s': %s" %
1413 (dev, unique_id, err))
1418 class HooksRunner(object):
1421 This class is instantiated on the node side (ganeti-noded) and not on
1425 RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1427 def __init__(self, hooks_base_dir=None):
1428 """Constructor for hooks runner.
1431 - hooks_base_dir: if not None, this overrides the
1432 constants.HOOKS_BASE_DIR (useful for unittests)
1433 - logs_base_dir: if not None, this overrides the
1434 constants.LOG_HOOKS_DIR (useful for unittests)
1435 - logging: enable or disable logging of script output
1438 if hooks_base_dir is None:
1439 hooks_base_dir = constants.HOOKS_BASE_DIR
1440 self._BASE_DIR = hooks_base_dir
1443 def ExecHook(script, env):
1444 """Exec one hook script.
1448 - script: the full path to the script
1449 - env: the environment with which to exec the script
1452 # exec the process using subprocess and log the output
1455 fdstdin = open("/dev/null", "r")
1456 child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1457 stderr=subprocess.STDOUT, close_fds=True,
1458 shell=False, cwd="/",env=env)
1461 output = child.stdout.read(4096)
1462 child.stdout.close()
1463 except EnvironmentError, err:
1464 output += "Hook script error: %s" % str(err)
1468 result = child.wait()
1470 except EnvironmentError, err:
1471 if err.errno == errno.EINTR:
1475 # try not to leak fds
1476 for fd in (fdstdin, ):
1480 except EnvironmentError, err:
1481 # just log the error
1482 #logger.Error("While closing fd %s: %s" % (fd, err))
1485 return result == 0, output
1487 def RunHooks(self, hpath, phase, env):
1488 """Run the scripts in the hooks directory.
1490 This method will not be usually overriden by child opcodes.
1493 if phase == constants.HOOKS_PHASE_PRE:
1495 elif phase == constants.HOOKS_PHASE_POST:
1498 raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1501 subdir = "%s-%s.d" % (hpath, suffix)
1502 dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1504 dir_contents = utils.ListVisibleFiles(dir_name)
1505 except OSError, err:
1509 # we use the standard python sort order,
1510 # so 00name is the recommended naming scheme
1512 for relname in dir_contents:
1513 fname = os.path.join(dir_name, relname)
1514 if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1515 self.RE_MASK.match(relname) is not None):
1516 rrval = constants.HKR_SKIP
1519 result, output = self.ExecHook(fname, env)
1521 rrval = constants.HKR_FAIL
1523 rrval = constants.HKR_SUCCESS
1524 rr.append(("%s/%s" % (subdir, relname), rrval, output))
1529 class DevCacheManager(object):
1530 """Simple class for managing a chache of block device information.
1533 _DEV_PREFIX = "/dev/"
1534 _ROOT_DIR = constants.BDEV_CACHE_DIR
1537 def _ConvertPath(cls, dev_path):
1538 """Converts a /dev/name path to the cache file name.
1540 This replaces slashes with underscores and strips the /dev
1541 prefix. It then returns the full path to the cache file
1544 if dev_path.startswith(cls._DEV_PREFIX):
1545 dev_path = dev_path[len(cls._DEV_PREFIX):]
1546 dev_path = dev_path.replace("/", "_")
1547 fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1551 def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1552 """Updates the cache information for a given device.
1555 if dev_path is None:
1556 logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1558 fpath = cls._ConvertPath(dev_path)
1564 iv_name = "not_visible"
1565 fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1567 utils.WriteFile(fpath, data=fdata)
1568 except EnvironmentError, err:
1569 logger.Error("Can't update bdev cache for %s, error %s" %
1570 (dev_path, str(err)))
1573 def RemoveCache(cls, dev_path):
1574 """Remove data for a dev_path.
1577 if dev_path is None:
1578 logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1580 fpath = cls._ConvertPath(dev_path)
1582 utils.RemoveFile(fpath)
1583 except EnvironmentError, err:
1584 logger.Error("Can't update bdev cache for %s, error %s" %
1585 (dev_path, str(err)))