4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Functions used by the node daemon"""
35 from ganeti import logger
36 from ganeti import errors
37 from ganeti import utils
38 from ganeti import ssh
39 from ganeti import hypervisor
40 from ganeti import constants
41 from ganeti import bdev
42 from ganeti import objects
43 from ganeti import ssconf
47 """Activate local node as master node.
49 There are two needed steps for this:
50 - run the master script
51 - register the cron script
54 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
57 logger.Error("could not activate cluster interface with command %s,"
58 " error: '%s'" % (result.cmd, result.output))
65 """Deactivate this node as master.
68 - run the master stop script
69 - remove link to master cron script.
72 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
75 logger.Error("could not deactivate cluster interface with command %s,"
76 " error: '%s'" % (result.cmd, result.output))
82 def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
83 """Joins this node to the cluster.
85 This does the following:
86 - updates the hostkeys of the machine (rsa and dsa)
87 - adds the ssh private key to the user
88 - adds the ssh public key to the users' authorized_keys file
91 sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
92 (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
93 (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
94 (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
95 for name, content, mode in sshd_keys:
96 utils.WriteFile(name, data=content, mode=mode)
99 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
101 except errors.OpExecError, err:
102 logger.Error("Error while processing user ssh files: %s" % err)
105 for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
106 utils.WriteFile(name, data=content, mode=0600)
108 utils.AddAuthorizedKey(auth_keys, sshpub)
110 utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
116 """Cleans up the current node and prepares it to be removed from the cluster.
119 if os.path.isdir(constants.DATA_DIR):
120 for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
121 full_name = os.path.join(constants.DATA_DIR, rel_name)
122 if os.path.isfile(full_name) and not os.path.islink(full_name):
123 utils.RemoveFile(full_name)
126 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
127 except errors.OpExecError, err:
128 logger.Error("Error while processing ssh files: %s" % err)
131 f = open(pub_key, 'r')
133 utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
137 utils.RemoveFile(priv_key)
138 utils.RemoveFile(pub_key)
141 def GetNodeInfo(vgname):
142 """Gives back a hash with different informations about the node.
145 { 'vg_size' : xxx, 'vg_free' : xxx, 'memory_domain0': xxx,
146 'memory_free' : xxx, 'memory_total' : xxx }
148 vg_size is the size of the configured volume group in MiB
149 vg_free is the free size of the volume group in MiB
150 memory_dom0 is the memory allocated for domain0 in MiB
151 memory_free is the currently available (free) ram in MiB
152 memory_total is the total number of ram in MiB
156 vginfo = _GetVGInfo(vgname)
157 outputarray['vg_size'] = vginfo['vg_size']
158 outputarray['vg_free'] = vginfo['vg_free']
160 hyper = hypervisor.GetHypervisor()
161 hyp_info = hyper.GetNodeInfo()
162 if hyp_info is not None:
163 outputarray.update(hyp_info)
165 f = open("/proc/sys/kernel/random/boot_id", 'r')
167 outputarray["bootid"] = f.read(128).rstrip("\n")
174 def VerifyNode(what):
175 """Verify the status of the local node.
178 what - a dictionary of things to check:
179 'filelist' : list of files for which to compute checksums
180 'nodelist' : list of nodes we should check communication with
181 'hypervisor': run the hypervisor-specific verify
183 Requested files on local node are checksummed and the result returned.
185 The nodelist is traversed, with the following checks being made
187 - known_hosts key correct
188 - correct resolving of node name (target node returns its own hostname
189 by ssh-execution of 'hostname', result compared against name in list.
194 if 'hypervisor' in what:
195 result['hypervisor'] = hypervisor.GetHypervisor().Verify()
197 if 'filelist' in what:
198 result['filelist'] = utils.FingerprintFiles(what['filelist'])
200 if 'nodelist' in what:
201 result['nodelist'] = {}
202 for node in what['nodelist']:
203 success, message = ssh.VerifyNodeHostname(node)
205 result['nodelist'][node] = message
209 def GetVolumeList(vg_name):
210 """Compute list of logical volumes and their size.
213 dictionary of all partions (key) with their size (in MiB), inactive
215 {'test1': ('20.06', True, True)}
220 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
221 "--separator=%s" % sep,
222 "-olv_name,lv_size,lv_attr", vg_name])
224 logger.Error("Failed to list logical volumes, lvs output: %s" %
228 for line in result.stdout.splitlines():
229 line = line.strip().rstrip(sep)
230 name, size, attr = line.split(sep)
233 inactive = attr[4] == '-'
234 online = attr[5] == 'o'
235 lvs[name] = (size, inactive, online)
240 def ListVolumeGroups():
241 """List the volume groups and their size.
244 Dictionary with keys volume name and values the size of the volume
247 return utils.ListVolumeGroups()
251 """List all volumes on this node.
254 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
256 "--options=lv_name,lv_size,devices,vg_name"])
258 logger.Error("Failed to list logical volumes, lvs output: %s" %
264 return dev.split('(')[0]
270 'name': line[0].strip(),
271 'size': line[1].strip(),
272 'dev': parse_dev(line[2].strip()),
273 'vg': line[3].strip(),
276 return [map_line(line.split('|')) for line in result.stdout.splitlines()]
279 def BridgesExist(bridges_list):
280 """Check if a list of bridges exist on the current node.
283 True if all of them exist, false otherwise
286 for bridge in bridges_list:
287 if not utils.BridgeExists(bridge):
293 def GetInstanceList():
294 """Provides a list of instances.
297 A list of all running instances on the current node
298 - instance1.example.com
299 - instance2.example.com
303 names = hypervisor.GetHypervisor().ListInstances()
304 except errors.HypervisorError, err:
305 logger.Error("error enumerating instances: %s" % str(err))
311 def GetInstanceInfo(instance):
312 """Gives back the informations about an instance as a dictionary.
315 instance: name of the instance (ex. instance1.example.com)
318 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
320 memory: memory size of instance (int)
321 state: xen state of instance (string)
322 time: cpu time of instance (float)
327 iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
328 if iinfo is not None:
329 output['memory'] = iinfo[2]
330 output['state'] = iinfo[4]
331 output['time'] = iinfo[5]
336 def GetAllInstancesInfo():
337 """Gather data about all instances.
339 This is the equivalent of `GetInstanceInfo()`, except that it
340 computes data for all instances at once, thus being faster if one
341 needs data about more than one instance.
343 Returns: a dictionary of dictionaries, keys being the instance name,
345 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
347 memory: memory size of instance (int)
348 state: xen state of instance (string)
349 time: cpu time of instance (float)
350 vcpus: the number of cpus
355 iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
357 for name, inst_id, memory, vcpus, state, times in iinfo:
368 def AddOSToInstance(instance, os_disk, swap_disk):
369 """Add an OS to an instance.
372 instance: the instance object
373 os_disk: the instance-visible name of the os device
374 swap_disk: the instance-visible name of the swap device
377 inst_os = OSFromDisk(instance.os)
379 create_script = inst_os.create_script
381 os_device = instance.FindDisk(os_disk)
382 if os_device is None:
383 logger.Error("Can't find this device-visible name '%s'" % os_disk)
386 swap_device = instance.FindDisk(swap_disk)
387 if swap_device is None:
388 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
391 real_os_dev = _RecursiveFindBD(os_device)
392 if real_os_dev is None:
393 raise errors.BlockDeviceError("Block device '%s' is not set up" %
397 real_swap_dev = _RecursiveFindBD(swap_device)
398 if real_swap_dev is None:
399 raise errors.BlockDeviceError("Block device '%s' is not set up" %
403 logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
404 instance.name, int(time.time()))
405 if not os.path.exists(constants.LOG_OS_DIR):
406 os.mkdir(constants.LOG_OS_DIR, 0750)
408 command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
409 inst_os.path, create_script, instance.name,
410 real_os_dev.dev_path, real_swap_dev.dev_path,
413 result = utils.RunCmd(command)
415 logger.Error("os create command '%s' returned error: %s, logfile: %s,"
417 (command, result.fail_reason, logfile, result.output))
423 def RunRenameInstance(instance, old_name, os_disk, swap_disk):
424 """Run the OS rename script for an instance.
427 instance: the instance object
428 old_name: the old name of the instance
429 os_disk: the instance-visible name of the os device
430 swap_disk: the instance-visible name of the swap device
433 inst_os = OSFromDisk(instance.os)
435 script = inst_os.rename_script
437 os_device = instance.FindDisk(os_disk)
438 if os_device is None:
439 logger.Error("Can't find this device-visible name '%s'" % os_disk)
442 swap_device = instance.FindDisk(swap_disk)
443 if swap_device is None:
444 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
447 real_os_dev = _RecursiveFindBD(os_device)
448 if real_os_dev is None:
449 raise errors.BlockDeviceError("Block device '%s' is not set up" %
453 real_swap_dev = _RecursiveFindBD(swap_device)
454 if real_swap_dev is None:
455 raise errors.BlockDeviceError("Block device '%s' is not set up" %
459 logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
461 instance.name, int(time.time()))
462 if not os.path.exists(constants.LOG_OS_DIR):
463 os.mkdir(constants.LOG_OS_DIR, 0750)
465 command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
466 inst_os.path, script, old_name, instance.name,
467 real_os_dev.dev_path, real_swap_dev.dev_path,
470 result = utils.RunCmd(command)
473 logger.Error("os create command '%s' returned error: %s"
475 (command, result.fail_reason, result.output))
481 def _GetVGInfo(vg_name):
482 """Get informations about the volume group.
485 vg_name: the volume group
488 { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
490 vg_size is the total size of the volume group in MiB
491 vg_free is the free size of the volume group in MiB
492 pv_count are the number of physical disks in that vg
494 If an error occurs during gathering of data, we return the same dict
495 with keys all set to None.
498 retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
500 retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
501 "--nosuffix", "--units=m", "--separator=:", vg_name])
504 errmsg = "volume group %s not present" % vg_name
507 valarr = retval.stdout.strip().rstrip(':').split(':')
511 "vg_size": int(round(float(valarr[0]), 0)),
512 "vg_free": int(round(float(valarr[1]), 0)),
513 "pv_count": int(valarr[2]),
515 except ValueError, err:
516 logger.Error("Fail to parse vgs output: %s" % str(err))
518 logger.Error("vgs output has the wrong number of fields (expected"
519 " three): %s" % str(valarr))
523 def _GatherBlockDevs(instance):
524 """Set up an instance's block device(s).
526 This is run on the primary node at instance startup. The block
527 devices must be already assembled.
531 for disk in instance.disks:
532 device = _RecursiveFindBD(disk)
534 raise errors.BlockDeviceError("Block device '%s' is not set up." %
537 block_devices.append((disk, device))
541 def StartInstance(instance, extra_args):
542 """Start an instance.
545 instance - name of instance to start.
548 running_instances = GetInstanceList()
550 if instance.name in running_instances:
553 block_devices = _GatherBlockDevs(instance)
554 hyper = hypervisor.GetHypervisor()
557 hyper.StartInstance(instance, block_devices, extra_args)
558 except errors.HypervisorError, err:
559 logger.Error("Failed to start instance: %s" % err)
565 def ShutdownInstance(instance):
566 """Shut an instance down.
569 instance - name of instance to shutdown.
572 running_instances = GetInstanceList()
574 if instance.name not in running_instances:
577 hyper = hypervisor.GetHypervisor()
579 hyper.StopInstance(instance)
580 except errors.HypervisorError, err:
581 logger.Error("Failed to stop instance: %s" % err)
584 # test every 10secs for 2min
588 for dummy in range(11):
589 if instance.name not in GetInstanceList():
593 # the shutdown did not succeed
594 logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
597 hyper.StopInstance(instance, force=True)
598 except errors.HypervisorError, err:
599 logger.Error("Failed to stop instance: %s" % err)
603 if instance.name in GetInstanceList():
604 logger.Error("could not shutdown instance '%s' even by destroy")
610 def RebootInstance(instance, reboot_type, extra_args):
611 """Reboot an instance.
614 instance - name of instance to reboot
615 reboot_type - how to reboot [soft,hard,full]
618 running_instances = GetInstanceList()
620 if instance.name not in running_instances:
621 logger.Error("Cannot reboot instance that is not running")
624 hyper = hypervisor.GetHypervisor()
625 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
627 hyper.RebootInstance(instance)
628 except errors.HypervisorError, err:
629 logger.Error("Failed to soft reboot instance: %s" % err)
631 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
633 ShutdownInstance(instance)
634 StartInstance(instance, extra_args)
635 except errors.HypervisorError, err:
636 logger.Error("Failed to hard reboot instance: %s" % err)
639 raise errors.ParameterError("reboot_type invalid")
645 def CreateBlockDevice(disk, size, owner, on_primary, info):
646 """Creates a block device for an instance.
649 bdev: a ganeti.objects.Disk object
650 size: the size of the physical underlying devices
651 do_open: if the device should be `Assemble()`-d and
652 `Open()`-ed after creation
655 the new unique_id of the device (this can sometime be
656 computed only after creation), or None. On secondary nodes,
657 it's not required to return anything.
662 for child in disk.children:
663 crdev = _RecursiveAssembleBD(child, owner, on_primary)
664 if on_primary or disk.AssembleOnSecondary():
665 # we need the children open in case the device itself has to
670 device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
671 if device is not None:
672 logger.Info("removing existing device %s" % disk)
674 except errors.BlockDeviceError, err:
677 device = bdev.Create(disk.dev_type, disk.physical_id,
680 raise ValueError("Can't create child device for %s, %s" %
682 if on_primary or disk.AssembleOnSecondary():
683 if not device.Assemble():
684 errorstring = "Can't assemble device after creation"
685 logger.Error(errorstring)
686 raise errors.BlockDeviceError("%s, very unusual event - check the node"
687 " daemon logs" % errorstring)
688 device.SetSyncSpeed(constants.SYNC_SPEED)
689 if on_primary or disk.OpenOnSecondary():
690 device.Open(force=True)
691 DevCacheManager.UpdateCache(device.dev_path, owner,
692 on_primary, disk.iv_name)
696 physical_id = device.unique_id
700 def RemoveBlockDevice(disk):
701 """Remove a block device.
703 This is intended to be called recursively.
707 # since we are removing the device, allow a partial match
708 # this allows removal of broken mirrors
709 rdev = _RecursiveFindBD(disk, allow_partial=True)
710 except errors.BlockDeviceError, err:
711 # probably can't attach
712 logger.Info("Can't attach to device %s in remove" % disk)
715 r_path = rdev.dev_path
716 result = rdev.Remove()
718 DevCacheManager.RemoveCache(r_path)
722 for child in disk.children:
723 result = result and RemoveBlockDevice(child)
727 def _RecursiveAssembleBD(disk, owner, as_primary):
728 """Activate a block device for an instance.
730 This is run on the primary and secondary nodes for an instance.
732 This function is called recursively.
735 disk: a objects.Disk object
736 as_primary: if we should make the block device read/write
739 the assembled device or None (in case no device was assembled)
741 If the assembly is not successful, an exception is raised.
746 mcn = disk.ChildrenNeeded()
748 mcn = 0 # max number of Nones allowed
750 mcn = len(disk.children) - mcn # max number of Nones
751 for chld_disk in disk.children:
753 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
754 except errors.BlockDeviceError, err:
755 if children.count(None) >= mcn:
758 logger.Debug("Error in child activation: %s" % str(err))
759 children.append(cdev)
761 if as_primary or disk.AssembleOnSecondary():
762 r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
763 r_dev.SetSyncSpeed(constants.SYNC_SPEED)
765 if as_primary or disk.OpenOnSecondary():
767 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
768 as_primary, disk.iv_name)
775 def AssembleBlockDevice(disk, owner, as_primary):
776 """Activate a block device for an instance.
778 This is a wrapper over _RecursiveAssembleBD.
781 a /dev path for primary nodes
782 True for secondary nodes
785 result = _RecursiveAssembleBD(disk, owner, as_primary)
786 if isinstance(result, bdev.BlockDev):
787 result = result.dev_path
791 def ShutdownBlockDevice(disk):
792 """Shut down a block device.
794 First, if the device is assembled (can `Attach()`), then the device
795 is shutdown. Then the children of the device are shutdown.
797 This function is called recursively. Note that we don't cache the
798 children or such, as oppossed to assemble, shutdown of different
799 devices doesn't require that the upper device was active.
802 r_dev = _RecursiveFindBD(disk)
803 if r_dev is not None:
804 r_path = r_dev.dev_path
805 result = r_dev.Shutdown()
807 DevCacheManager.RemoveCache(r_path)
811 for child in disk.children:
812 result = result and ShutdownBlockDevice(child)
816 def MirrorAddChildren(parent_cdev, new_cdevs):
817 """Extend a mirrored block device.
820 parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
821 if parent_bdev is None:
822 logger.Error("Can't find parent device")
824 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
825 if new_bdevs.count(None) > 0:
826 logger.Error("Can't find new device(s) to add: %s:%s" %
827 (new_bdevs, new_cdevs))
829 parent_bdev.AddChildren(new_bdevs)
833 def MirrorRemoveChildren(parent_cdev, new_cdevs):
834 """Shrink a mirrored block device.
837 parent_bdev = _RecursiveFindBD(parent_cdev)
838 if parent_bdev is None:
839 logger.Error("Can't find parent in remove children: %s" % parent_cdev)
842 for disk in new_cdevs:
843 rpath = disk.StaticDevPath()
845 bd = _RecursiveFindBD(disk)
847 logger.Error("Can't find dynamic device %s while removing children" %
851 devs.append(bd.dev_path)
854 parent_bdev.RemoveChildren(devs)
858 def GetMirrorStatus(disks):
859 """Get the mirroring status of a list of devices.
862 disks: list of `objects.Disk`
865 list of (mirror_done, estimated_time) tuples, which
866 are the result of bdev.BlockDevice.CombinedSyncStatus()
871 rbd = _RecursiveFindBD(dsk)
873 raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
874 stats.append(rbd.CombinedSyncStatus())
878 def _RecursiveFindBD(disk, allow_partial=False):
879 """Check if a device is activated.
881 If so, return informations about the real device.
884 disk: the objects.Disk instance
885 allow_partial: don't abort the find if a child of the
886 device can't be found; this is intended to be
887 used when repairing mirrors
890 None if the device can't be found
891 otherwise the device instance
896 for chdisk in disk.children:
897 children.append(_RecursiveFindBD(chdisk))
899 return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
902 def FindBlockDevice(disk):
903 """Check if a device is activated.
905 If so, return informations about the real device.
908 disk: the objects.Disk instance
910 None if the device can't be found
911 (device_path, major, minor, sync_percent, estimated_time, is_degraded)
914 rbd = _RecursiveFindBD(disk)
917 return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
920 def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
921 """Write a file to the filesystem.
923 This allows the master to overwrite(!) a file. It will only perform
924 the operation if the file belongs to a list of configuration files.
927 if not os.path.isabs(file_name):
928 logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
933 constants.CLUSTER_CONF_FILE,
935 constants.SSH_KNOWN_HOSTS_FILE,
937 allowed_files.extend(ssconf.SimpleStore().GetFileList())
938 if file_name not in allowed_files:
939 logger.Error("Filename passed to UploadFile not in allowed"
940 " upload targets: '%s'" % file_name)
943 dir_name, small_name = os.path.split(file_name)
944 fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
945 # here we need to make sure we remove the temp file, if any error
948 os.chown(new_name, uid, gid)
949 os.chmod(new_name, mode)
952 os.utime(new_name, (atime, mtime))
953 os.rename(new_name, file_name)
956 utils.RemoveFile(new_name)
960 def _ErrnoOrStr(err):
961 """Format an EnvironmentError exception.
963 If the `err` argument has an errno attribute, it will be looked up
964 and converted into a textual EXXXX description. Otherwise the string
965 representation of the error will be returned.
968 if hasattr(err, 'errno'):
969 detail = errno.errorcode[err.errno]
975 def _OSSearch(name, search_path=None):
976 """Search for OSes with the given name in the search_path.
979 name: The name of the OS to look for
980 search_path: List of dirs to search (defaults to constants.OS_SEARCH_PATH)
983 The base_dir the OS resides in
986 if search_path is None:
987 search_path = constants.OS_SEARCH_PATH
989 for dir_name in search_path:
990 t_os_dir = os.path.sep.join([dir_name, name])
991 if os.path.isdir(t_os_dir):
997 def _OSOndiskVersion(name, os_dir):
998 """Compute and return the API version of a given OS.
1000 This function will try to read the API version of the os given by
1001 the 'name' parameter and residing in the 'os_dir' directory.
1003 Return value will be either an integer denoting the version or None in the
1004 case when this is not a valid OS name.
1007 api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1010 st = os.stat(api_file)
1011 except EnvironmentError, err:
1012 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1013 " found (%s)" % _ErrnoOrStr(err))
1015 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1016 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1022 api_version = f.read(256)
1025 except EnvironmentError, err:
1026 raise errors.InvalidOS(name, os_dir, "error while reading the"
1027 " API version (%s)" % _ErrnoOrStr(err))
1029 api_version = api_version.strip()
1031 api_version = int(api_version)
1032 except (TypeError, ValueError), err:
1033 raise errors.InvalidOS(name, os_dir,
1034 "API version is not integer (%s)" % str(err))
1039 def DiagnoseOS(top_dirs=None):
1040 """Compute the validity for all OSes.
1042 Returns an OS object for each name in all the given top directories
1043 (if not given defaults to constants.OS_SEARCH_PATH)
1049 if top_dirs is None:
1050 top_dirs = constants.OS_SEARCH_PATH
1053 for dir_name in top_dirs:
1054 if os.path.isdir(dir_name):
1056 f_names = utils.ListVisibleFiles(dir_name)
1057 except EnvironmentError, err:
1058 logger.Error("Can't list the OS directory %s: %s" %
1059 (dir_name, str(err)))
1061 for name in f_names:
1063 os_inst = OSFromDisk(name, base_dir=dir_name)
1064 result.append(os_inst)
1065 except errors.InvalidOS, err:
1066 result.append(objects.OS.FromInvalidOS(err))
1071 def OSFromDisk(name, base_dir=None):
1072 """Create an OS instance from disk.
1074 This function will return an OS instance if the given name is a
1075 valid OS name. Otherwise, it will raise an appropriate
1076 `errors.InvalidOS` exception, detailing why this is not a valid
1080 os_dir: Directory containing the OS scripts. Defaults to a search
1081 in all the OS_SEARCH_PATH directories.
1085 if base_dir is None:
1086 base_dir = _OSSearch(name)
1088 if base_dir is None:
1089 raise errors.InvalidOS(name, None, "OS dir not found in search path")
1091 os_dir = os.path.sep.join([base_dir, name])
1092 api_version = _OSOndiskVersion(name, os_dir)
1094 if api_version != constants.OS_API_VERSION:
1095 raise errors.InvalidOS(name, os_dir, "API version mismatch"
1096 " (found %s want %s)"
1097 % (api_version, constants.OS_API_VERSION))
1099 # OS Scripts dictionary, we will populate it with the actual script names
1100 os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1102 for script in os_scripts:
1103 os_scripts[script] = os.path.sep.join([os_dir, script])
1106 st = os.stat(os_scripts[script])
1107 except EnvironmentError, err:
1108 raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1109 (script, _ErrnoOrStr(err)))
1111 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1112 raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1115 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1116 raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1120 return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1121 create_script=os_scripts['create'],
1122 export_script=os_scripts['export'],
1123 import_script=os_scripts['import'],
1124 rename_script=os_scripts['rename'],
1125 api_version=api_version)
1128 def SnapshotBlockDevice(disk):
1129 """Create a snapshot copy of a block device.
1131 This function is called recursively, and the snapshot is actually created
1132 just for the leaf lvm backend device.
1135 disk: the disk to be snapshotted
1138 a config entry for the actual lvm device snapshotted.
1142 if len(disk.children) == 1:
1143 # only one child, let's recurse on it
1144 return SnapshotBlockDevice(disk.children[0])
1146 # more than one child, choose one that matches
1147 for child in disk.children:
1148 if child.size == disk.size:
1149 # return implies breaking the loop
1150 return SnapshotBlockDevice(child)
1151 elif disk.dev_type == constants.LD_LV:
1152 r_dev = _RecursiveFindBD(disk)
1153 if r_dev is not None:
1154 # let's stay on the safe side and ask for the full size, for now
1155 return r_dev.Snapshot(disk.size)
1159 raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1160 " '%s' of type '%s'" %
1161 (disk.unique_id, disk.dev_type))
1164 def ExportSnapshot(disk, dest_node, instance):
1165 """Export a block device snapshot to a remote node.
1168 disk: the snapshot block device
1169 dest_node: the node to send the image to
1170 instance: instance being exported
1173 True if successful, False otherwise.
1176 inst_os = OSFromDisk(instance.os)
1177 export_script = inst_os.export_script
1179 logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1180 instance.name, int(time.time()))
1181 if not os.path.exists(constants.LOG_OS_DIR):
1182 os.mkdir(constants.LOG_OS_DIR, 0750)
1184 real_os_dev = _RecursiveFindBD(disk)
1185 if real_os_dev is None:
1186 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1190 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1191 destfile = disk.physical_id[1]
1193 # the target command is built out of three individual commands,
1194 # which are joined by pipes; we check each individual command for
1197 expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1198 export_script, instance.name,
1199 real_os_dev.dev_path, logfile)
1203 destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1204 destdir, destdir, destfile)
1205 remotecmd = ssh.BuildSSHCmd(dest_node, constants.GANETI_RUNAS, destcmd)
1209 # all commands have been checked, so we're safe to combine them
1210 command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1212 result = utils.RunCmd(command)
1215 logger.Error("os snapshot export command '%s' returned error: %s"
1217 (command, result.fail_reason, result.output))
1223 def FinalizeExport(instance, snap_disks):
1224 """Write out the export configuration information.
1227 instance: instance configuration
1228 snap_disks: snapshot block devices
1231 False in case of error, True otherwise.
1234 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1235 finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1237 config = objects.SerializableConfigParser()
1239 config.add_section(constants.INISECT_EXP)
1240 config.set(constants.INISECT_EXP, 'version', '0')
1241 config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1242 config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1243 config.set(constants.INISECT_EXP, 'os', instance.os)
1244 config.set(constants.INISECT_EXP, 'compression', 'gzip')
1246 config.add_section(constants.INISECT_INS)
1247 config.set(constants.INISECT_INS, 'name', instance.name)
1248 config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1249 config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1250 config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1251 for nic_count, nic in enumerate(instance.nics):
1252 config.set(constants.INISECT_INS, 'nic%d_mac' %
1253 nic_count, '%s' % nic.mac)
1254 config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1255 config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1256 # TODO: redundant: on load can read nics until it doesn't exist
1257 config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1259 for disk_count, disk in enumerate(snap_disks):
1260 config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1261 ('%s' % disk.iv_name))
1262 config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1263 ('%s' % disk.physical_id[1]))
1264 config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1266 config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1268 cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1269 cfo = open(cff, 'w')
1275 shutil.rmtree(finaldestdir, True)
1276 shutil.move(destdir, finaldestdir)
1281 def ExportInfo(dest):
1282 """Get export configuration information.
1285 dest: directory containing the export
1288 A serializable config file containing the export info.
1291 cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1293 config = objects.SerializableConfigParser()
1296 if (not config.has_section(constants.INISECT_EXP) or
1297 not config.has_section(constants.INISECT_INS)):
1303 def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1304 """Import an os image into an instance.
1307 instance: the instance object
1308 os_disk: the instance-visible name of the os device
1309 swap_disk: the instance-visible name of the swap device
1310 src_node: node holding the source image
1311 src_image: path to the source image on src_node
1314 False in case of error, True otherwise.
1317 inst_os = OSFromDisk(instance.os)
1318 import_script = inst_os.import_script
1320 os_device = instance.FindDisk(os_disk)
1321 if os_device is None:
1322 logger.Error("Can't find this device-visible name '%s'" % os_disk)
1325 swap_device = instance.FindDisk(swap_disk)
1326 if swap_device is None:
1327 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1330 real_os_dev = _RecursiveFindBD(os_device)
1331 if real_os_dev is None:
1332 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1336 real_swap_dev = _RecursiveFindBD(swap_device)
1337 if real_swap_dev is None:
1338 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1340 real_swap_dev.Open()
1342 logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1343 instance.name, int(time.time()))
1344 if not os.path.exists(constants.LOG_OS_DIR):
1345 os.mkdir(constants.LOG_OS_DIR, 0750)
1347 destcmd = utils.BuildShellCmd('cat %s', src_image)
1348 remotecmd = ssh.BuildSSHCmd(src_node, constants.GANETI_RUNAS, destcmd)
1351 impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1352 inst_os.path, import_script, instance.name,
1353 real_os_dev.dev_path, real_swap_dev.dev_path,
1356 command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1358 result = utils.RunCmd(command)
1361 logger.Error("os import command '%s' returned error: %s"
1363 (command, result.fail_reason, result.output))
1370 """Return a list of exports currently available on this machine.
1373 if os.path.isdir(constants.EXPORT_DIR):
1374 return utils.ListVisibleFiles(constants.EXPORT_DIR)
1379 def RemoveExport(export):
1380 """Remove an existing export from the node.
1383 export: the name of the export to remove
1386 False in case of error, True otherwise.
1389 target = os.path.join(constants.EXPORT_DIR, export)
1391 shutil.rmtree(target)
1392 # TODO: catch some of the relevant exceptions and provide a pretty
1393 # error message if rmtree fails.
1398 def RenameBlockDevices(devlist):
1399 """Rename a list of block devices.
1401 The devlist argument is a list of tuples (disk, new_logical,
1402 new_physical). The return value will be a combined boolean result
1403 (True only if all renames succeeded).
1407 for disk, unique_id in devlist:
1408 dev = _RecursiveFindBD(disk)
1413 old_rpath = dev.dev_path
1414 dev.Rename(unique_id)
1415 new_rpath = dev.dev_path
1416 if old_rpath != new_rpath:
1417 DevCacheManager.RemoveCache(old_rpath)
1418 # FIXME: we should add the new cache information here, like:
1419 # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1420 # but we don't have the owner here - maybe parse from existing
1421 # cache? for now, we only lose lvm data when we rename, which
1422 # is less critical than DRBD or MD
1423 except errors.BlockDeviceError, err:
1424 logger.Error("Can't rename device '%s' to '%s': %s" %
1425 (dev, unique_id, err))
1430 class HooksRunner(object):
1433 This class is instantiated on the node side (ganeti-noded) and not on
1437 RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1439 def __init__(self, hooks_base_dir=None):
1440 """Constructor for hooks runner.
1443 - hooks_base_dir: if not None, this overrides the
1444 constants.HOOKS_BASE_DIR (useful for unittests)
1445 - logs_base_dir: if not None, this overrides the
1446 constants.LOG_HOOKS_DIR (useful for unittests)
1447 - logging: enable or disable logging of script output
1450 if hooks_base_dir is None:
1451 hooks_base_dir = constants.HOOKS_BASE_DIR
1452 self._BASE_DIR = hooks_base_dir
1455 def ExecHook(script, env):
1456 """Exec one hook script.
1460 - script: the full path to the script
1461 - env: the environment with which to exec the script
1464 # exec the process using subprocess and log the output
1467 fdstdin = open("/dev/null", "r")
1468 child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1469 stderr=subprocess.STDOUT, close_fds=True,
1470 shell=False, cwd="/", env=env)
1473 output = child.stdout.read(4096)
1474 child.stdout.close()
1475 except EnvironmentError, err:
1476 output += "Hook script error: %s" % str(err)
1480 result = child.wait()
1482 except EnvironmentError, err:
1483 if err.errno == errno.EINTR:
1487 # try not to leak fds
1488 for fd in (fdstdin, ):
1492 except EnvironmentError, err:
1493 # just log the error
1494 #logger.Error("While closing fd %s: %s" % (fd, err))
1497 return result == 0, output
1499 def RunHooks(self, hpath, phase, env):
1500 """Run the scripts in the hooks directory.
1502 This method will not be usually overriden by child opcodes.
1505 if phase == constants.HOOKS_PHASE_PRE:
1507 elif phase == constants.HOOKS_PHASE_POST:
1510 raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1513 subdir = "%s-%s.d" % (hpath, suffix)
1514 dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1516 dir_contents = utils.ListVisibleFiles(dir_name)
1517 except OSError, err:
1521 # we use the standard python sort order,
1522 # so 00name is the recommended naming scheme
1524 for relname in dir_contents:
1525 fname = os.path.join(dir_name, relname)
1526 if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1527 self.RE_MASK.match(relname) is not None):
1528 rrval = constants.HKR_SKIP
1531 result, output = self.ExecHook(fname, env)
1533 rrval = constants.HKR_FAIL
1535 rrval = constants.HKR_SUCCESS
1536 rr.append(("%s/%s" % (subdir, relname), rrval, output))
1541 class DevCacheManager(object):
1542 """Simple class for managing a chache of block device information.
1545 _DEV_PREFIX = "/dev/"
1546 _ROOT_DIR = constants.BDEV_CACHE_DIR
1549 def _ConvertPath(cls, dev_path):
1550 """Converts a /dev/name path to the cache file name.
1552 This replaces slashes with underscores and strips the /dev
1553 prefix. It then returns the full path to the cache file
1556 if dev_path.startswith(cls._DEV_PREFIX):
1557 dev_path = dev_path[len(cls._DEV_PREFIX):]
1558 dev_path = dev_path.replace("/", "_")
1559 fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1563 def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1564 """Updates the cache information for a given device.
1567 if dev_path is None:
1568 logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1570 fpath = cls._ConvertPath(dev_path)
1576 iv_name = "not_visible"
1577 fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1579 utils.WriteFile(fpath, data=fdata)
1580 except EnvironmentError, err:
1581 logger.Error("Can't update bdev cache for %s, error %s" %
1582 (dev_path, str(err)))
1585 def RemoveCache(cls, dev_path):
1586 """Remove data for a dev_path.
1589 if dev_path is None:
1590 logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1592 fpath = cls._ConvertPath(dev_path)
1594 utils.RemoveFile(fpath)
1595 except EnvironmentError, err:
1596 logger.Error("Can't update bdev cache for %s, error %s" %
1597 (dev_path, str(err)))