4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Functions used by the node daemon"""
35 from ganeti import logger
36 from ganeti import errors
37 from ganeti import utils
38 from ganeti import ssh
39 from ganeti import hypervisor
40 from ganeti import constants
41 from ganeti import bdev
42 from ganeti import objects
43 from ganeti import ssconf
47 """Activate local node as master node.
49 There are two needed steps for this:
50 - run the master script
51 - register the cron script
54 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
57 logger.Error("could not activate cluster interface with command %s,"
58 " error: '%s'" % (result.cmd, result.output))
65 """Deactivate this node as master.
68 - run the master stop script
69 - remove link to master cron script.
72 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
75 logger.Error("could not deactivate cluster interface with command %s,"
76 " error: '%s'" % (result.cmd, result.output))
82 def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
83 """Joins this node to the cluster.
85 This does the following:
86 - updates the hostkeys of the machine (rsa and dsa)
87 - adds the ssh private key to the user
88 - adds the ssh public key to the users' authorized_keys file
91 sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
92 (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
93 (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
94 (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
95 for name, content, mode in sshd_keys:
96 utils.WriteFile(name, data=content, mode=mode)
99 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
101 except errors.OpExecError, err:
102 logger.Error("Error while processing user ssh files: %s" % err)
105 for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
106 utils.WriteFile(name, data=content, mode=0600)
108 utils.AddAuthorizedKey(auth_keys, sshpub)
110 utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
116 """Cleans up the current node and prepares it to be removed from the cluster.
119 if os.path.isdir(constants.DATA_DIR):
120 for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
121 full_name = os.path.join(constants.DATA_DIR, rel_name)
122 if os.path.isfile(full_name) and not os.path.islink(full_name):
123 utils.RemoveFile(full_name)
126 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
127 except errors.OpExecError, err:
128 logger.Error("Error while processing ssh files: %s" % err)
131 f = open(pub_key, 'r')
133 utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
137 utils.RemoveFile(priv_key)
138 utils.RemoveFile(pub_key)
141 def GetNodeInfo(vgname):
142 """Gives back a hash with different informations about the node.
145 { 'vg_size' : xxx, 'vg_free' : xxx, 'memory_domain0': xxx,
146 'memory_free' : xxx, 'memory_total' : xxx }
148 vg_size is the size of the configured volume group in MiB
149 vg_free is the free size of the volume group in MiB
150 memory_dom0 is the memory allocated for domain0 in MiB
151 memory_free is the currently available (free) ram in MiB
152 memory_total is the total number of ram in MiB
156 vginfo = _GetVGInfo(vgname)
157 outputarray['vg_size'] = vginfo['vg_size']
158 outputarray['vg_free'] = vginfo['vg_free']
160 hyper = hypervisor.GetHypervisor()
161 hyp_info = hyper.GetNodeInfo()
162 if hyp_info is not None:
163 outputarray.update(hyp_info)
165 f = open("/proc/sys/kernel/random/boot_id", 'r')
167 outputarray["bootid"] = f.read(128).rstrip("\n")
174 def VerifyNode(what):
175 """Verify the status of the local node.
178 what - a dictionary of things to check:
179 'filelist' : list of files for which to compute checksums
180 'nodelist' : list of nodes we should check communication with
181 'hypervisor': run the hypervisor-specific verify
183 Requested files on local node are checksummed and the result returned.
185 The nodelist is traversed, with the following checks being made
187 - known_hosts key correct
188 - correct resolving of node name (target node returns its own hostname
189 by ssh-execution of 'hostname', result compared against name in list.
194 if 'hypervisor' in what:
195 result['hypervisor'] = hypervisor.GetHypervisor().Verify()
197 if 'filelist' in what:
198 result['filelist'] = utils.FingerprintFiles(what['filelist'])
200 if 'nodelist' in what:
201 result['nodelist'] = {}
202 for node in what['nodelist']:
203 success, message = ssh.VerifyNodeHostname(node)
205 result['nodelist'][node] = message
209 def GetVolumeList(vg_name):
210 """Compute list of logical volumes and their size.
213 dictionary of all partions (key) with their size (in MiB), inactive
215 {'test1': ('20.06', True, True)}
220 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
221 "--separator=%s" % sep,
222 "-olv_name,lv_size,lv_attr", vg_name])
224 logger.Error("Failed to list logical volumes, lvs output: %s" %
228 for line in result.stdout.splitlines():
229 line = line.strip().rstrip(sep)
230 name, size, attr = line.split(sep)
233 inactive = attr[4] == '-'
234 online = attr[5] == 'o'
235 lvs[name] = (size, inactive, online)
240 def ListVolumeGroups():
241 """List the volume groups and their size.
244 Dictionary with keys volume name and values the size of the volume
247 return utils.ListVolumeGroups()
251 """List all volumes on this node.
254 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
256 "--options=lv_name,lv_size,devices,vg_name"])
258 logger.Error("Failed to list logical volumes, lvs output: %s" %
264 return dev.split('(')[0]
270 'name': line[0].strip(),
271 'size': line[1].strip(),
272 'dev': parse_dev(line[2].strip()),
273 'vg': line[3].strip(),
276 return [map_line(line.split('|')) for line in result.stdout.splitlines()]
279 def BridgesExist(bridges_list):
280 """Check if a list of bridges exist on the current node.
283 True if all of them exist, false otherwise
286 for bridge in bridges_list:
287 if not utils.BridgeExists(bridge):
293 def GetInstanceList():
294 """Provides a list of instances.
297 A list of all running instances on the current node
298 - instance1.example.com
299 - instance2.example.com
303 names = hypervisor.GetHypervisor().ListInstances()
304 except errors.HypervisorError, err:
305 logger.Error("error enumerating instances: %s" % str(err))
311 def GetInstanceInfo(instance):
312 """Gives back the informations about an instance as a dictionary.
315 instance: name of the instance (ex. instance1.example.com)
318 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
320 memory: memory size of instance (int)
321 state: xen state of instance (string)
322 time: cpu time of instance (float)
327 iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
328 if iinfo is not None:
329 output['memory'] = iinfo[2]
330 output['state'] = iinfo[4]
331 output['time'] = iinfo[5]
336 def GetAllInstancesInfo():
337 """Gather data about all instances.
339 This is the equivalent of `GetInstanceInfo()`, except that it
340 computes data for all instances at once, thus being faster if one
341 needs data about more than one instance.
343 Returns: a dictionary of dictionaries, keys being the instance name,
345 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
347 memory: memory size of instance (int)
348 state: xen state of instance (string)
349 time: cpu time of instance (float)
350 vcpus: the number of cpus
355 iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
357 for name, inst_id, memory, vcpus, state, times in iinfo:
368 def AddOSToInstance(instance, os_disk, swap_disk):
369 """Add an OS to an instance.
372 instance: the instance object
373 os_disk: the instance-visible name of the os device
374 swap_disk: the instance-visible name of the swap device
377 inst_os = OSFromDisk(instance.os)
379 create_script = inst_os.create_script
381 os_device = instance.FindDisk(os_disk)
382 if os_device is None:
383 logger.Error("Can't find this device-visible name '%s'" % os_disk)
386 swap_device = instance.FindDisk(swap_disk)
387 if swap_device is None:
388 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
391 real_os_dev = _RecursiveFindBD(os_device)
392 if real_os_dev is None:
393 raise errors.BlockDeviceError("Block device '%s' is not set up" %
397 real_swap_dev = _RecursiveFindBD(swap_device)
398 if real_swap_dev is None:
399 raise errors.BlockDeviceError("Block device '%s' is not set up" %
403 logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
404 instance.name, int(time.time()))
405 if not os.path.exists(constants.LOG_OS_DIR):
406 os.mkdir(constants.LOG_OS_DIR, 0750)
408 command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
409 inst_os.path, create_script, instance.name,
410 real_os_dev.dev_path, real_swap_dev.dev_path,
413 result = utils.RunCmd(command)
415 logger.Error("os create command '%s' returned error: %s, logfile: %s,"
417 (command, result.fail_reason, logfile, result.output))
423 def RunRenameInstance(instance, old_name, os_disk, swap_disk):
424 """Run the OS rename script for an instance.
427 instance: the instance object
428 old_name: the old name of the instance
429 os_disk: the instance-visible name of the os device
430 swap_disk: the instance-visible name of the swap device
433 inst_os = OSFromDisk(instance.os)
435 script = inst_os.rename_script
437 os_device = instance.FindDisk(os_disk)
438 if os_device is None:
439 logger.Error("Can't find this device-visible name '%s'" % os_disk)
442 swap_device = instance.FindDisk(swap_disk)
443 if swap_device is None:
444 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
447 real_os_dev = _RecursiveFindBD(os_device)
448 if real_os_dev is None:
449 raise errors.BlockDeviceError("Block device '%s' is not set up" %
453 real_swap_dev = _RecursiveFindBD(swap_device)
454 if real_swap_dev is None:
455 raise errors.BlockDeviceError("Block device '%s' is not set up" %
459 logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
461 instance.name, int(time.time()))
462 if not os.path.exists(constants.LOG_OS_DIR):
463 os.mkdir(constants.LOG_OS_DIR, 0750)
465 command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
466 inst_os.path, script, old_name, instance.name,
467 real_os_dev.dev_path, real_swap_dev.dev_path,
470 result = utils.RunCmd(command)
473 logger.Error("os create command '%s' returned error: %s"
475 (command, result.fail_reason, result.output))
481 def _GetVGInfo(vg_name):
482 """Get informations about the volume group.
485 vg_name: the volume group
488 { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
490 vg_size is the total size of the volume group in MiB
491 vg_free is the free size of the volume group in MiB
492 pv_count are the number of physical disks in that vg
494 If an error occurs during gathering of data, we return the same dict
495 with keys all set to None.
498 retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
500 retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
501 "--nosuffix", "--units=m", "--separator=:", vg_name])
504 errmsg = "volume group %s not present" % vg_name
507 valarr = retval.stdout.strip().rstrip(':').split(':')
511 "vg_size": int(round(float(valarr[0]), 0)),
512 "vg_free": int(round(float(valarr[1]), 0)),
513 "pv_count": int(valarr[2]),
515 except ValueError, err:
516 logger.Error("Fail to parse vgs output: %s" % str(err))
518 logger.Error("vgs output has the wrong number of fields (expected"
519 " three): %s" % str(valarr))
523 def _GatherBlockDevs(instance):
524 """Set up an instance's block device(s).
526 This is run on the primary node at instance startup. The block
527 devices must be already assembled.
531 for disk in instance.disks:
532 device = _RecursiveFindBD(disk)
534 raise errors.BlockDeviceError("Block device '%s' is not set up." %
537 block_devices.append((disk, device))
541 def StartInstance(instance, extra_args):
542 """Start an instance.
545 instance - name of instance to start.
548 running_instances = GetInstanceList()
550 if instance.name in running_instances:
553 block_devices = _GatherBlockDevs(instance)
554 hyper = hypervisor.GetHypervisor()
557 hyper.StartInstance(instance, block_devices, extra_args)
558 except errors.HypervisorError, err:
559 logger.Error("Failed to start instance: %s" % err)
565 def ShutdownInstance(instance):
566 """Shut an instance down.
569 instance - name of instance to shutdown.
572 running_instances = GetInstanceList()
574 if instance.name not in running_instances:
577 hyper = hypervisor.GetHypervisor()
579 hyper.StopInstance(instance)
580 except errors.HypervisorError, err:
581 logger.Error("Failed to stop instance: %s" % err)
584 # test every 10secs for 2min
588 for dummy in range(11):
589 if instance.name not in GetInstanceList():
593 # the shutdown did not succeed
594 logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
597 hyper.StopInstance(instance, force=True)
598 except errors.HypervisorError, err:
599 logger.Error("Failed to stop instance: %s" % err)
603 if instance.name in GetInstanceList():
604 logger.Error("could not shutdown instance '%s' even by destroy")
610 def RebootInstance(instance, reboot_type, extra_args):
611 """Reboot an instance.
614 instance - name of instance to reboot
615 reboot_type - how to reboot [soft,hard,full]
618 running_instances = GetInstanceList()
620 if instance.name not in running_instances:
621 logger.Error("Cannot reboot instance that is not running")
624 hyper = hypervisor.GetHypervisor()
625 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
627 hyper.RebootInstance(instance)
628 except errors.HypervisorError, err:
629 logger.Error("Failed to soft reboot instance: %s" % err)
631 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
633 ShutdownInstance(instance)
634 StartInstance(instance, extra_args)
635 except errors.HypervisorError, err:
636 logger.Error("Failed to hard reboot instance: %s" % err)
639 raise errors.ParameterError("reboot_type invalid")
645 def CreateBlockDevice(disk, size, owner, on_primary, info):
646 """Creates a block device for an instance.
649 bdev: a ganeti.objects.Disk object
650 size: the size of the physical underlying devices
651 do_open: if the device should be `Assemble()`-d and
652 `Open()`-ed after creation
655 the new unique_id of the device (this can sometime be
656 computed only after creation), or None. On secondary nodes,
657 it's not required to return anything.
662 for child in disk.children:
663 crdev = _RecursiveAssembleBD(child, owner, on_primary)
664 if on_primary or disk.AssembleOnSecondary():
665 # we need the children open in case the device itself has to
672 device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
673 if device is not None:
674 logger.Info("removing existing device %s" % disk)
676 except errors.BlockDeviceError, err:
679 device = bdev.Create(disk.dev_type, disk.physical_id,
682 raise ValueError("Can't create child device for %s, %s" %
684 if on_primary or disk.AssembleOnSecondary():
685 if not device.Assemble():
686 errorstring = "Can't assemble device after creation"
687 logger.Error(errorstring)
688 raise errors.BlockDeviceError("%s, very unusual event - check the node"
689 " daemon logs" % errorstring)
690 device.SetSyncSpeed(constants.SYNC_SPEED)
691 if on_primary or disk.OpenOnSecondary():
692 device.Open(force=True)
693 DevCacheManager.UpdateCache(device.dev_path, owner,
694 on_primary, disk.iv_name)
698 physical_id = device.unique_id
702 def RemoveBlockDevice(disk):
703 """Remove a block device.
705 This is intended to be called recursively.
709 # since we are removing the device, allow a partial match
710 # this allows removal of broken mirrors
711 rdev = _RecursiveFindBD(disk, allow_partial=True)
712 except errors.BlockDeviceError, err:
713 # probably can't attach
714 logger.Info("Can't attach to device %s in remove" % disk)
717 r_path = rdev.dev_path
718 result = rdev.Remove()
720 DevCacheManager.RemoveCache(r_path)
724 for child in disk.children:
725 result = result and RemoveBlockDevice(child)
729 def _RecursiveAssembleBD(disk, owner, as_primary):
730 """Activate a block device for an instance.
732 This is run on the primary and secondary nodes for an instance.
734 This function is called recursively.
737 disk: a objects.Disk object
738 as_primary: if we should make the block device read/write
741 the assembled device or None (in case no device was assembled)
743 If the assembly is not successful, an exception is raised.
748 mcn = disk.ChildrenNeeded()
750 mcn = 0 # max number of Nones allowed
752 mcn = len(disk.children) - mcn # max number of Nones
753 for chld_disk in disk.children:
755 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
756 except errors.BlockDeviceError, err:
757 if children.count(None) >= mcn:
760 logger.Debug("Error in child activation: %s" % str(err))
761 children.append(cdev)
763 if as_primary or disk.AssembleOnSecondary():
764 r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
765 r_dev.SetSyncSpeed(constants.SYNC_SPEED)
767 if as_primary or disk.OpenOnSecondary():
771 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
772 as_primary, disk.iv_name)
779 def AssembleBlockDevice(disk, owner, as_primary):
780 """Activate a block device for an instance.
782 This is a wrapper over _RecursiveAssembleBD.
785 a /dev path for primary nodes
786 True for secondary nodes
789 result = _RecursiveAssembleBD(disk, owner, as_primary)
790 if isinstance(result, bdev.BlockDev):
791 result = result.dev_path
795 def ShutdownBlockDevice(disk):
796 """Shut down a block device.
798 First, if the device is assembled (can `Attach()`), then the device
799 is shutdown. Then the children of the device are shutdown.
801 This function is called recursively. Note that we don't cache the
802 children or such, as oppossed to assemble, shutdown of different
803 devices doesn't require that the upper device was active.
806 r_dev = _RecursiveFindBD(disk)
807 if r_dev is not None:
808 r_path = r_dev.dev_path
809 result = r_dev.Shutdown()
811 DevCacheManager.RemoveCache(r_path)
815 for child in disk.children:
816 result = result and ShutdownBlockDevice(child)
820 def MirrorAddChildren(parent_cdev, new_cdevs):
821 """Extend a mirrored block device.
824 parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
825 if parent_bdev is None:
826 logger.Error("Can't find parent device")
828 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
829 if new_bdevs.count(None) > 0:
830 logger.Error("Can't find new device(s) to add: %s:%s" %
831 (new_bdevs, new_cdevs))
833 parent_bdev.AddChildren(new_bdevs)
837 def MirrorRemoveChildren(parent_cdev, new_cdevs):
838 """Shrink a mirrored block device.
841 parent_bdev = _RecursiveFindBD(parent_cdev)
842 if parent_bdev is None:
843 logger.Error("Can't find parent in remove children: %s" % parent_cdev)
846 for disk in new_cdevs:
847 rpath = disk.StaticDevPath()
849 bd = _RecursiveFindBD(disk)
851 logger.Error("Can't find dynamic device %s while removing children" %
855 devs.append(bd.dev_path)
858 parent_bdev.RemoveChildren(devs)
862 def GetMirrorStatus(disks):
863 """Get the mirroring status of a list of devices.
866 disks: list of `objects.Disk`
869 list of (mirror_done, estimated_time) tuples, which
870 are the result of bdev.BlockDevice.CombinedSyncStatus()
875 rbd = _RecursiveFindBD(dsk)
877 raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
878 stats.append(rbd.CombinedSyncStatus())
882 def _RecursiveFindBD(disk, allow_partial=False):
883 """Check if a device is activated.
885 If so, return informations about the real device.
888 disk: the objects.Disk instance
889 allow_partial: don't abort the find if a child of the
890 device can't be found; this is intended to be
891 used when repairing mirrors
894 None if the device can't be found
895 otherwise the device instance
900 for chdisk in disk.children:
901 children.append(_RecursiveFindBD(chdisk))
903 return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
906 def FindBlockDevice(disk):
907 """Check if a device is activated.
909 If so, return informations about the real device.
912 disk: the objects.Disk instance
914 None if the device can't be found
915 (device_path, major, minor, sync_percent, estimated_time, is_degraded)
918 rbd = _RecursiveFindBD(disk)
921 return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
924 def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
925 """Write a file to the filesystem.
927 This allows the master to overwrite(!) a file. It will only perform
928 the operation if the file belongs to a list of configuration files.
931 if not os.path.isabs(file_name):
932 logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
937 constants.CLUSTER_CONF_FILE,
939 constants.SSH_KNOWN_HOSTS_FILE,
941 allowed_files.extend(ssconf.SimpleStore().GetFileList())
942 if file_name not in allowed_files:
943 logger.Error("Filename passed to UploadFile not in allowed"
944 " upload targets: '%s'" % file_name)
947 dir_name, small_name = os.path.split(file_name)
948 fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
949 # here we need to make sure we remove the temp file, if any error
952 os.chown(new_name, uid, gid)
953 os.chmod(new_name, mode)
956 os.utime(new_name, (atime, mtime))
957 os.rename(new_name, file_name)
960 utils.RemoveFile(new_name)
964 def _ErrnoOrStr(err):
965 """Format an EnvironmentError exception.
967 If the `err` argument has an errno attribute, it will be looked up
968 and converted into a textual EXXXX description. Otherwise the string
969 representation of the error will be returned.
972 if hasattr(err, 'errno'):
973 detail = errno.errorcode[err.errno]
979 def _OSSearch(name, search_path=None):
980 """Search for OSes with the given name in the search_path.
983 name: The name of the OS to look for
984 search_path: List of dirs to search (defaults to constants.OS_SEARCH_PATH)
987 The base_dir the OS resides in
990 if search_path is None:
991 search_path = constants.OS_SEARCH_PATH
993 for dir_name in search_path:
994 t_os_dir = os.path.sep.join([dir_name, name])
995 if os.path.isdir(t_os_dir):
1001 def _OSOndiskVersion(name, os_dir):
1002 """Compute and return the API version of a given OS.
1004 This function will try to read the API version of the os given by
1005 the 'name' parameter and residing in the 'os_dir' directory.
1007 Return value will be either an integer denoting the version or None in the
1008 case when this is not a valid OS name.
1011 api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1014 st = os.stat(api_file)
1015 except EnvironmentError, err:
1016 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1017 " found (%s)" % _ErrnoOrStr(err))
1019 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1020 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1026 api_version = f.read(256)
1029 except EnvironmentError, err:
1030 raise errors.InvalidOS(name, os_dir, "error while reading the"
1031 " API version (%s)" % _ErrnoOrStr(err))
1033 api_version = api_version.strip()
1035 api_version = int(api_version)
1036 except (TypeError, ValueError), err:
1037 raise errors.InvalidOS(name, os_dir,
1038 "API version is not integer (%s)" % str(err))
1043 def DiagnoseOS(top_dirs=None):
1044 """Compute the validity for all OSes.
1046 Returns an OS object for each name in all the given top directories
1047 (if not given defaults to constants.OS_SEARCH_PATH)
1053 if top_dirs is None:
1054 top_dirs = constants.OS_SEARCH_PATH
1057 for dir_name in top_dirs:
1058 if os.path.isdir(dir_name):
1060 f_names = utils.ListVisibleFiles(dir_name)
1061 except EnvironmentError, err:
1062 logger.Error("Can't list the OS directory %s: %s" %
1063 (dir_name, str(err)))
1065 for name in f_names:
1067 os_inst = OSFromDisk(name, base_dir=dir_name)
1068 result.append(os_inst)
1069 except errors.InvalidOS, err:
1070 result.append(objects.OS.FromInvalidOS(err))
1075 def OSFromDisk(name, base_dir=None):
1076 """Create an OS instance from disk.
1078 This function will return an OS instance if the given name is a
1079 valid OS name. Otherwise, it will raise an appropriate
1080 `errors.InvalidOS` exception, detailing why this is not a valid
1084 os_dir: Directory containing the OS scripts. Defaults to a search
1085 in all the OS_SEARCH_PATH directories.
1089 if base_dir is None:
1090 base_dir = _OSSearch(name)
1092 if base_dir is None:
1093 raise errors.InvalidOS(name, None, "OS dir not found in search path")
1095 os_dir = os.path.sep.join([base_dir, name])
1096 api_version = _OSOndiskVersion(name, os_dir)
1098 if api_version != constants.OS_API_VERSION:
1099 raise errors.InvalidOS(name, os_dir, "API version mismatch"
1100 " (found %s want %s)"
1101 % (api_version, constants.OS_API_VERSION))
1103 # OS Scripts dictionary, we will populate it with the actual script names
1104 os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1106 for script in os_scripts:
1107 os_scripts[script] = os.path.sep.join([os_dir, script])
1110 st = os.stat(os_scripts[script])
1111 except EnvironmentError, err:
1112 raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1113 (script, _ErrnoOrStr(err)))
1115 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1116 raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1119 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1120 raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1124 return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1125 create_script=os_scripts['create'],
1126 export_script=os_scripts['export'],
1127 import_script=os_scripts['import'],
1128 rename_script=os_scripts['rename'],
1129 api_version=api_version)
1132 def SnapshotBlockDevice(disk):
1133 """Create a snapshot copy of a block device.
1135 This function is called recursively, and the snapshot is actually created
1136 just for the leaf lvm backend device.
1139 disk: the disk to be snapshotted
1142 a config entry for the actual lvm device snapshotted.
1146 if len(disk.children) == 1:
1147 # only one child, let's recurse on it
1148 return SnapshotBlockDevice(disk.children[0])
1150 # more than one child, choose one that matches
1151 for child in disk.children:
1152 if child.size == disk.size:
1153 # return implies breaking the loop
1154 return SnapshotBlockDevice(child)
1155 elif disk.dev_type == constants.LD_LV:
1156 r_dev = _RecursiveFindBD(disk)
1157 if r_dev is not None:
1158 # let's stay on the safe side and ask for the full size, for now
1159 return r_dev.Snapshot(disk.size)
1163 raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1164 " '%s' of type '%s'" %
1165 (disk.unique_id, disk.dev_type))
1168 def ExportSnapshot(disk, dest_node, instance):
1169 """Export a block device snapshot to a remote node.
1172 disk: the snapshot block device
1173 dest_node: the node to send the image to
1174 instance: instance being exported
1177 True if successful, False otherwise.
1180 inst_os = OSFromDisk(instance.os)
1181 export_script = inst_os.export_script
1183 logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1184 instance.name, int(time.time()))
1185 if not os.path.exists(constants.LOG_OS_DIR):
1186 os.mkdir(constants.LOG_OS_DIR, 0750)
1188 real_os_dev = _RecursiveFindBD(disk)
1189 if real_os_dev is None:
1190 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1194 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1195 destfile = disk.physical_id[1]
1197 # the target command is built out of three individual commands,
1198 # which are joined by pipes; we check each individual command for
1201 expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1202 export_script, instance.name,
1203 real_os_dev.dev_path, logfile)
1207 destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1208 destdir, destdir, destfile)
1209 remotecmd = ssh.BuildSSHCmd(dest_node, constants.GANETI_RUNAS, destcmd)
1213 # all commands have been checked, so we're safe to combine them
1214 command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1216 result = utils.RunCmd(command)
1219 logger.Error("os snapshot export command '%s' returned error: %s"
1221 (command, result.fail_reason, result.output))
1227 def FinalizeExport(instance, snap_disks):
1228 """Write out the export configuration information.
1231 instance: instance configuration
1232 snap_disks: snapshot block devices
1235 False in case of error, True otherwise.
1238 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1239 finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1241 config = objects.SerializableConfigParser()
1243 config.add_section(constants.INISECT_EXP)
1244 config.set(constants.INISECT_EXP, 'version', '0')
1245 config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1246 config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1247 config.set(constants.INISECT_EXP, 'os', instance.os)
1248 config.set(constants.INISECT_EXP, 'compression', 'gzip')
1250 config.add_section(constants.INISECT_INS)
1251 config.set(constants.INISECT_INS, 'name', instance.name)
1252 config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1253 config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1254 config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1255 for nic_count, nic in enumerate(instance.nics):
1256 config.set(constants.INISECT_INS, 'nic%d_mac' %
1257 nic_count, '%s' % nic.mac)
1258 config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1259 config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1260 # TODO: redundant: on load can read nics until it doesn't exist
1261 config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1263 for disk_count, disk in enumerate(snap_disks):
1264 config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1265 ('%s' % disk.iv_name))
1266 config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1267 ('%s' % disk.physical_id[1]))
1268 config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1270 config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1272 cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1273 cfo = open(cff, 'w')
1279 shutil.rmtree(finaldestdir, True)
1280 shutil.move(destdir, finaldestdir)
1285 def ExportInfo(dest):
1286 """Get export configuration information.
1289 dest: directory containing the export
1292 A serializable config file containing the export info.
1295 cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1297 config = objects.SerializableConfigParser()
1300 if (not config.has_section(constants.INISECT_EXP) or
1301 not config.has_section(constants.INISECT_INS)):
1307 def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1308 """Import an os image into an instance.
1311 instance: the instance object
1312 os_disk: the instance-visible name of the os device
1313 swap_disk: the instance-visible name of the swap device
1314 src_node: node holding the source image
1315 src_image: path to the source image on src_node
1318 False in case of error, True otherwise.
1321 inst_os = OSFromDisk(instance.os)
1322 import_script = inst_os.import_script
1324 os_device = instance.FindDisk(os_disk)
1325 if os_device is None:
1326 logger.Error("Can't find this device-visible name '%s'" % os_disk)
1329 swap_device = instance.FindDisk(swap_disk)
1330 if swap_device is None:
1331 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1334 real_os_dev = _RecursiveFindBD(os_device)
1335 if real_os_dev is None:
1336 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1340 real_swap_dev = _RecursiveFindBD(swap_device)
1341 if real_swap_dev is None:
1342 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1344 real_swap_dev.Open()
1346 logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1347 instance.name, int(time.time()))
1348 if not os.path.exists(constants.LOG_OS_DIR):
1349 os.mkdir(constants.LOG_OS_DIR, 0750)
1351 destcmd = utils.BuildShellCmd('cat %s', src_image)
1352 remotecmd = ssh.BuildSSHCmd(src_node, constants.GANETI_RUNAS, destcmd)
1355 impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1356 inst_os.path, import_script, instance.name,
1357 real_os_dev.dev_path, real_swap_dev.dev_path,
1360 command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1362 result = utils.RunCmd(command)
1365 logger.Error("os import command '%s' returned error: %s"
1367 (command, result.fail_reason, result.output))
1374 """Return a list of exports currently available on this machine.
1377 if os.path.isdir(constants.EXPORT_DIR):
1378 return utils.ListVisibleFiles(constants.EXPORT_DIR)
1383 def RemoveExport(export):
1384 """Remove an existing export from the node.
1387 export: the name of the export to remove
1390 False in case of error, True otherwise.
1393 target = os.path.join(constants.EXPORT_DIR, export)
1395 shutil.rmtree(target)
1396 # TODO: catch some of the relevant exceptions and provide a pretty
1397 # error message if rmtree fails.
1402 def RenameBlockDevices(devlist):
1403 """Rename a list of block devices.
1405 The devlist argument is a list of tuples (disk, new_logical,
1406 new_physical). The return value will be a combined boolean result
1407 (True only if all renames succeeded).
1411 for disk, unique_id in devlist:
1412 dev = _RecursiveFindBD(disk)
1417 old_rpath = dev.dev_path
1418 dev.Rename(unique_id)
1419 new_rpath = dev.dev_path
1420 if old_rpath != new_rpath:
1421 DevCacheManager.RemoveCache(old_rpath)
1422 # FIXME: we should add the new cache information here, like:
1423 # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1424 # but we don't have the owner here - maybe parse from existing
1425 # cache? for now, we only lose lvm data when we rename, which
1426 # is less critical than DRBD or MD
1427 except errors.BlockDeviceError, err:
1428 logger.Error("Can't rename device '%s' to '%s': %s" %
1429 (dev, unique_id, err))
1434 class HooksRunner(object):
1437 This class is instantiated on the node side (ganeti-noded) and not on
1441 RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1443 def __init__(self, hooks_base_dir=None):
1444 """Constructor for hooks runner.
1447 - hooks_base_dir: if not None, this overrides the
1448 constants.HOOKS_BASE_DIR (useful for unittests)
1449 - logs_base_dir: if not None, this overrides the
1450 constants.LOG_HOOKS_DIR (useful for unittests)
1451 - logging: enable or disable logging of script output
1454 if hooks_base_dir is None:
1455 hooks_base_dir = constants.HOOKS_BASE_DIR
1456 self._BASE_DIR = hooks_base_dir
1459 def ExecHook(script, env):
1460 """Exec one hook script.
1464 - script: the full path to the script
1465 - env: the environment with which to exec the script
1468 # exec the process using subprocess and log the output
1471 fdstdin = open("/dev/null", "r")
1472 child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1473 stderr=subprocess.STDOUT, close_fds=True,
1474 shell=False, cwd="/", env=env)
1477 output = child.stdout.read(4096)
1478 child.stdout.close()
1479 except EnvironmentError, err:
1480 output += "Hook script error: %s" % str(err)
1484 result = child.wait()
1486 except EnvironmentError, err:
1487 if err.errno == errno.EINTR:
1491 # try not to leak fds
1492 for fd in (fdstdin, ):
1496 except EnvironmentError, err:
1497 # just log the error
1498 #logger.Error("While closing fd %s: %s" % (fd, err))
1501 return result == 0, output
1503 def RunHooks(self, hpath, phase, env):
1504 """Run the scripts in the hooks directory.
1506 This method will not be usually overriden by child opcodes.
1509 if phase == constants.HOOKS_PHASE_PRE:
1511 elif phase == constants.HOOKS_PHASE_POST:
1514 raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1517 subdir = "%s-%s.d" % (hpath, suffix)
1518 dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1520 dir_contents = utils.ListVisibleFiles(dir_name)
1521 except OSError, err:
1525 # we use the standard python sort order,
1526 # so 00name is the recommended naming scheme
1528 for relname in dir_contents:
1529 fname = os.path.join(dir_name, relname)
1530 if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1531 self.RE_MASK.match(relname) is not None):
1532 rrval = constants.HKR_SKIP
1535 result, output = self.ExecHook(fname, env)
1537 rrval = constants.HKR_FAIL
1539 rrval = constants.HKR_SUCCESS
1540 rr.append(("%s/%s" % (subdir, relname), rrval, output))
1545 class DevCacheManager(object):
1546 """Simple class for managing a chache of block device information.
1549 _DEV_PREFIX = "/dev/"
1550 _ROOT_DIR = constants.BDEV_CACHE_DIR
1553 def _ConvertPath(cls, dev_path):
1554 """Converts a /dev/name path to the cache file name.
1556 This replaces slashes with underscores and strips the /dev
1557 prefix. It then returns the full path to the cache file
1560 if dev_path.startswith(cls._DEV_PREFIX):
1561 dev_path = dev_path[len(cls._DEV_PREFIX):]
1562 dev_path = dev_path.replace("/", "_")
1563 fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1567 def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1568 """Updates the cache information for a given device.
1571 if dev_path is None:
1572 logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1574 fpath = cls._ConvertPath(dev_path)
1580 iv_name = "not_visible"
1581 fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1583 utils.WriteFile(fpath, data=fdata)
1584 except EnvironmentError, err:
1585 logger.Error("Can't update bdev cache for %s, error %s" %
1586 (dev_path, str(err)))
1589 def RemoveCache(cls, dev_path):
1590 """Remove data for a dev_path.
1593 if dev_path is None:
1594 logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1596 fpath = cls._ConvertPath(dev_path)
1598 utils.RemoveFile(fpath)
1599 except EnvironmentError, err:
1600 logger.Error("Can't update bdev cache for %s, error %s" %
1601 (dev_path, str(err)))