4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Functions used by the node daemon"""
35 from ganeti import logger
36 from ganeti import errors
37 from ganeti import utils
38 from ganeti import ssh
39 from ganeti import hypervisor
40 from ganeti import constants
41 from ganeti import bdev
42 from ganeti import objects
43 from ganeti import ssconf
47 return ssh.SshRunner()
51 """Activate local node as master node.
53 There are two needed steps for this:
54 - run the master script
55 - register the cron script
58 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
61 logger.Error("could not activate cluster interface with command %s,"
62 " error: '%s'" % (result.cmd, result.output))
69 """Deactivate this node as master.
71 This runs the master stop script.
74 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
77 logger.Error("could not deactivate cluster interface with command %s,"
78 " error: '%s'" % (result.cmd, result.output))
84 def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
85 """Joins this node to the cluster.
87 This does the following:
88 - updates the hostkeys of the machine (rsa and dsa)
89 - adds the ssh private key to the user
90 - adds the ssh public key to the users' authorized_keys file
93 sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
94 (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
95 (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
96 (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
97 for name, content, mode in sshd_keys:
98 utils.WriteFile(name, data=content, mode=mode)
101 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
103 except errors.OpExecError, err:
104 logger.Error("Error while processing user ssh files: %s" % err)
107 for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
108 utils.WriteFile(name, data=content, mode=0600)
110 utils.AddAuthorizedKey(auth_keys, sshpub)
112 utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
118 """Cleans up the current node and prepares it to be removed from the cluster.
121 if os.path.isdir(constants.DATA_DIR):
122 for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
123 full_name = os.path.join(constants.DATA_DIR, rel_name)
124 if os.path.isfile(full_name) and not os.path.islink(full_name):
125 utils.RemoveFile(full_name)
128 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
129 except errors.OpExecError, err:
130 logger.Error("Error while processing ssh files: %s" % err)
133 f = open(pub_key, 'r')
135 utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
139 utils.RemoveFile(priv_key)
140 utils.RemoveFile(pub_key)
143 def GetNodeInfo(vgname):
144 """Gives back a hash with different informations about the node.
147 { 'vg_size' : xxx, 'vg_free' : xxx, 'memory_domain0': xxx,
148 'memory_free' : xxx, 'memory_total' : xxx }
150 vg_size is the size of the configured volume group in MiB
151 vg_free is the free size of the volume group in MiB
152 memory_dom0 is the memory allocated for domain0 in MiB
153 memory_free is the currently available (free) ram in MiB
154 memory_total is the total number of ram in MiB
158 vginfo = _GetVGInfo(vgname)
159 outputarray['vg_size'] = vginfo['vg_size']
160 outputarray['vg_free'] = vginfo['vg_free']
162 hyper = hypervisor.GetHypervisor()
163 hyp_info = hyper.GetNodeInfo()
164 if hyp_info is not None:
165 outputarray.update(hyp_info)
167 f = open("/proc/sys/kernel/random/boot_id", 'r')
169 outputarray["bootid"] = f.read(128).rstrip("\n")
176 def VerifyNode(what):
177 """Verify the status of the local node.
180 what - a dictionary of things to check:
181 'filelist' : list of files for which to compute checksums
182 'nodelist' : list of nodes we should check communication with
183 'hypervisor': run the hypervisor-specific verify
185 Requested files on local node are checksummed and the result returned.
187 The nodelist is traversed, with the following checks being made
189 - known_hosts key correct
190 - correct resolving of node name (target node returns its own hostname
191 by ssh-execution of 'hostname', result compared against name in list.
196 if 'hypervisor' in what:
197 result['hypervisor'] = hypervisor.GetHypervisor().Verify()
199 if 'filelist' in what:
200 result['filelist'] = utils.FingerprintFiles(what['filelist'])
202 if 'nodelist' in what:
203 result['nodelist'] = {}
204 random.shuffle(what['nodelist'])
205 for node in what['nodelist']:
206 success, message = _GetSshRunner().VerifyNodeHostname(node)
208 result['nodelist'][node] = message
209 if 'node-net-test' in what:
210 result['node-net-test'] = {}
211 my_name = utils.HostInfo().name
212 my_pip = my_sip = None
213 for name, pip, sip in what['node-net-test']:
219 result['node-net-test'][my_name] = ("Can't find my own"
220 " primary/secondary IP"
223 port = ssconf.SimpleStore().GetNodeDaemonPort()
224 for name, pip, sip in what['node-net-test']:
226 if not utils.TcpPing(pip, port, source=my_pip):
227 fail.append("primary")
229 if not utils.TcpPing(sip, port, source=my_sip):
230 fail.append("secondary")
232 result['node-net-test'][name] = ("failure using the %s"
239 def GetVolumeList(vg_name):
240 """Compute list of logical volumes and their size.
243 dictionary of all partions (key) with their size (in MiB), inactive
245 {'test1': ('20.06', True, True)}
250 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
251 "--separator=%s" % sep,
252 "-olv_name,lv_size,lv_attr", vg_name])
254 logger.Error("Failed to list logical volumes, lvs output: %s" %
258 for line in result.stdout.splitlines():
259 line = line.strip().rstrip(sep)
260 name, size, attr = line.split(sep)
263 inactive = attr[4] == '-'
264 online = attr[5] == 'o'
265 lvs[name] = (size, inactive, online)
270 def ListVolumeGroups():
271 """List the volume groups and their size.
274 Dictionary with keys volume name and values the size of the volume
277 return utils.ListVolumeGroups()
281 """List all volumes on this node.
284 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
286 "--options=lv_name,lv_size,devices,vg_name"])
288 logger.Error("Failed to list logical volumes, lvs output: %s" %
294 return dev.split('(')[0]
300 'name': line[0].strip(),
301 'size': line[1].strip(),
302 'dev': parse_dev(line[2].strip()),
303 'vg': line[3].strip(),
306 return [map_line(line.split('|')) for line in result.stdout.splitlines()]
309 def BridgesExist(bridges_list):
310 """Check if a list of bridges exist on the current node.
313 True if all of them exist, false otherwise
316 for bridge in bridges_list:
317 if not utils.BridgeExists(bridge):
323 def GetInstanceList():
324 """Provides a list of instances.
327 A list of all running instances on the current node
328 - instance1.example.com
329 - instance2.example.com
333 names = hypervisor.GetHypervisor().ListInstances()
334 except errors.HypervisorError, err:
335 logger.Error("error enumerating instances: %s" % str(err))
341 def GetInstanceInfo(instance):
342 """Gives back the informations about an instance as a dictionary.
345 instance: name of the instance (ex. instance1.example.com)
348 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
350 memory: memory size of instance (int)
351 state: xen state of instance (string)
352 time: cpu time of instance (float)
357 iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
358 if iinfo is not None:
359 output['memory'] = iinfo[2]
360 output['state'] = iinfo[4]
361 output['time'] = iinfo[5]
366 def GetAllInstancesInfo():
367 """Gather data about all instances.
369 This is the equivalent of `GetInstanceInfo()`, except that it
370 computes data for all instances at once, thus being faster if one
371 needs data about more than one instance.
373 Returns: a dictionary of dictionaries, keys being the instance name,
375 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
377 memory: memory size of instance (int)
378 state: xen state of instance (string)
379 time: cpu time of instance (float)
380 vcpus: the number of cpus
385 iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
387 for name, inst_id, memory, vcpus, state, times in iinfo:
398 def AddOSToInstance(instance, os_disk, swap_disk):
399 """Add an OS to an instance.
402 instance: the instance object
403 os_disk: the instance-visible name of the os device
404 swap_disk: the instance-visible name of the swap device
407 inst_os = OSFromDisk(instance.os)
409 create_script = inst_os.create_script
411 os_device = instance.FindDisk(os_disk)
412 if os_device is None:
413 logger.Error("Can't find this device-visible name '%s'" % os_disk)
416 swap_device = instance.FindDisk(swap_disk)
417 if swap_device is None:
418 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
421 real_os_dev = _RecursiveFindBD(os_device)
422 if real_os_dev is None:
423 raise errors.BlockDeviceError("Block device '%s' is not set up" %
427 real_swap_dev = _RecursiveFindBD(swap_device)
428 if real_swap_dev is None:
429 raise errors.BlockDeviceError("Block device '%s' is not set up" %
433 logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
434 instance.name, int(time.time()))
435 if not os.path.exists(constants.LOG_OS_DIR):
436 os.mkdir(constants.LOG_OS_DIR, 0750)
438 command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
439 inst_os.path, create_script, instance.name,
440 real_os_dev.dev_path, real_swap_dev.dev_path,
443 result = utils.RunCmd(command)
445 logger.Error("os create command '%s' returned error: %s, logfile: %s,"
447 (command, result.fail_reason, logfile, result.output))
453 def RunRenameInstance(instance, old_name, os_disk, swap_disk):
454 """Run the OS rename script for an instance.
457 instance: the instance object
458 old_name: the old name of the instance
459 os_disk: the instance-visible name of the os device
460 swap_disk: the instance-visible name of the swap device
463 inst_os = OSFromDisk(instance.os)
465 script = inst_os.rename_script
467 os_device = instance.FindDisk(os_disk)
468 if os_device is None:
469 logger.Error("Can't find this device-visible name '%s'" % os_disk)
472 swap_device = instance.FindDisk(swap_disk)
473 if swap_device is None:
474 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
477 real_os_dev = _RecursiveFindBD(os_device)
478 if real_os_dev is None:
479 raise errors.BlockDeviceError("Block device '%s' is not set up" %
483 real_swap_dev = _RecursiveFindBD(swap_device)
484 if real_swap_dev is None:
485 raise errors.BlockDeviceError("Block device '%s' is not set up" %
489 logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
491 instance.name, int(time.time()))
492 if not os.path.exists(constants.LOG_OS_DIR):
493 os.mkdir(constants.LOG_OS_DIR, 0750)
495 command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
496 inst_os.path, script, old_name, instance.name,
497 real_os_dev.dev_path, real_swap_dev.dev_path,
500 result = utils.RunCmd(command)
503 logger.Error("os create command '%s' returned error: %s"
505 (command, result.fail_reason, result.output))
511 def _GetVGInfo(vg_name):
512 """Get informations about the volume group.
515 vg_name: the volume group
518 { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
520 vg_size is the total size of the volume group in MiB
521 vg_free is the free size of the volume group in MiB
522 pv_count are the number of physical disks in that vg
524 If an error occurs during gathering of data, we return the same dict
525 with keys all set to None.
528 retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
530 retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
531 "--nosuffix", "--units=m", "--separator=:", vg_name])
534 errmsg = "volume group %s not present" % vg_name
537 valarr = retval.stdout.strip().rstrip(':').split(':')
541 "vg_size": int(round(float(valarr[0]), 0)),
542 "vg_free": int(round(float(valarr[1]), 0)),
543 "pv_count": int(valarr[2]),
545 except ValueError, err:
546 logger.Error("Fail to parse vgs output: %s" % str(err))
548 logger.Error("vgs output has the wrong number of fields (expected"
549 " three): %s" % str(valarr))
553 def _GatherBlockDevs(instance):
554 """Set up an instance's block device(s).
556 This is run on the primary node at instance startup. The block
557 devices must be already assembled.
561 for disk in instance.disks:
562 device = _RecursiveFindBD(disk)
564 raise errors.BlockDeviceError("Block device '%s' is not set up." %
567 block_devices.append((disk, device))
571 def StartInstance(instance, extra_args):
572 """Start an instance.
575 instance - name of instance to start.
578 running_instances = GetInstanceList()
580 if instance.name in running_instances:
583 block_devices = _GatherBlockDevs(instance)
584 hyper = hypervisor.GetHypervisor()
587 hyper.StartInstance(instance, block_devices, extra_args)
588 except errors.HypervisorError, err:
589 logger.Error("Failed to start instance: %s" % err)
595 def ShutdownInstance(instance):
596 """Shut an instance down.
599 instance - name of instance to shutdown.
602 running_instances = GetInstanceList()
604 if instance.name not in running_instances:
607 hyper = hypervisor.GetHypervisor()
609 hyper.StopInstance(instance)
610 except errors.HypervisorError, err:
611 logger.Error("Failed to stop instance: %s" % err)
614 # test every 10secs for 2min
618 for dummy in range(11):
619 if instance.name not in GetInstanceList():
623 # the shutdown did not succeed
624 logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
627 hyper.StopInstance(instance, force=True)
628 except errors.HypervisorError, err:
629 logger.Error("Failed to stop instance: %s" % err)
633 if instance.name in GetInstanceList():
634 logger.Error("could not shutdown instance '%s' even by destroy")
640 def RebootInstance(instance, reboot_type, extra_args):
641 """Reboot an instance.
644 instance - name of instance to reboot
645 reboot_type - how to reboot [soft,hard,full]
648 running_instances = GetInstanceList()
650 if instance.name not in running_instances:
651 logger.Error("Cannot reboot instance that is not running")
654 hyper = hypervisor.GetHypervisor()
655 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
657 hyper.RebootInstance(instance)
658 except errors.HypervisorError, err:
659 logger.Error("Failed to soft reboot instance: %s" % err)
661 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
663 ShutdownInstance(instance)
664 StartInstance(instance, extra_args)
665 except errors.HypervisorError, err:
666 logger.Error("Failed to hard reboot instance: %s" % err)
669 raise errors.ParameterError("reboot_type invalid")
675 def CreateBlockDevice(disk, size, owner, on_primary, info):
676 """Creates a block device for an instance.
679 disk: a ganeti.objects.Disk object
680 size: the size of the physical underlying device
681 owner: a string with the name of the instance
682 on_primary: a boolean indicating if it is the primary node or not
683 info: string that will be sent to the physical device creation
686 the new unique_id of the device (this can sometime be
687 computed only after creation), or None. On secondary nodes,
688 it's not required to return anything.
693 for child in disk.children:
694 crdev = _RecursiveAssembleBD(child, owner, on_primary)
695 if on_primary or disk.AssembleOnSecondary():
696 # we need the children open in case the device itself has to
701 device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
702 if device is not None:
703 logger.Info("removing existing device %s" % disk)
705 except errors.BlockDeviceError, err:
708 device = bdev.Create(disk.dev_type, disk.physical_id,
711 raise ValueError("Can't create child device for %s, %s" %
713 if on_primary or disk.AssembleOnSecondary():
714 if not device.Assemble():
715 errorstring = "Can't assemble device after creation"
716 logger.Error(errorstring)
717 raise errors.BlockDeviceError("%s, very unusual event - check the node"
718 " daemon logs" % errorstring)
719 device.SetSyncSpeed(constants.SYNC_SPEED)
720 if on_primary or disk.OpenOnSecondary():
721 device.Open(force=True)
722 DevCacheManager.UpdateCache(device.dev_path, owner,
723 on_primary, disk.iv_name)
727 physical_id = device.unique_id
731 def RemoveBlockDevice(disk):
732 """Remove a block device.
734 This is intended to be called recursively.
738 # since we are removing the device, allow a partial match
739 # this allows removal of broken mirrors
740 rdev = _RecursiveFindBD(disk, allow_partial=True)
741 except errors.BlockDeviceError, err:
742 # probably can't attach
743 logger.Info("Can't attach to device %s in remove" % disk)
746 r_path = rdev.dev_path
747 result = rdev.Remove()
749 DevCacheManager.RemoveCache(r_path)
753 for child in disk.children:
754 result = result and RemoveBlockDevice(child)
758 def _RecursiveAssembleBD(disk, owner, as_primary):
759 """Activate a block device for an instance.
761 This is run on the primary and secondary nodes for an instance.
763 This function is called recursively.
766 disk: a objects.Disk object
767 as_primary: if we should make the block device read/write
770 the assembled device or None (in case no device was assembled)
772 If the assembly is not successful, an exception is raised.
777 mcn = disk.ChildrenNeeded()
779 mcn = 0 # max number of Nones allowed
781 mcn = len(disk.children) - mcn # max number of Nones
782 for chld_disk in disk.children:
784 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
785 except errors.BlockDeviceError, err:
786 if children.count(None) >= mcn:
789 logger.Debug("Error in child activation: %s" % str(err))
790 children.append(cdev)
792 if as_primary or disk.AssembleOnSecondary():
793 r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
794 r_dev.SetSyncSpeed(constants.SYNC_SPEED)
796 if as_primary or disk.OpenOnSecondary():
798 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
799 as_primary, disk.iv_name)
806 def AssembleBlockDevice(disk, owner, as_primary):
807 """Activate a block device for an instance.
809 This is a wrapper over _RecursiveAssembleBD.
812 a /dev path for primary nodes
813 True for secondary nodes
816 result = _RecursiveAssembleBD(disk, owner, as_primary)
817 if isinstance(result, bdev.BlockDev):
818 result = result.dev_path
822 def ShutdownBlockDevice(disk):
823 """Shut down a block device.
825 First, if the device is assembled (can `Attach()`), then the device
826 is shutdown. Then the children of the device are shutdown.
828 This function is called recursively. Note that we don't cache the
829 children or such, as oppossed to assemble, shutdown of different
830 devices doesn't require that the upper device was active.
833 r_dev = _RecursiveFindBD(disk)
834 if r_dev is not None:
835 r_path = r_dev.dev_path
836 result = r_dev.Shutdown()
838 DevCacheManager.RemoveCache(r_path)
842 for child in disk.children:
843 result = result and ShutdownBlockDevice(child)
847 def MirrorAddChildren(parent_cdev, new_cdevs):
848 """Extend a mirrored block device.
851 parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
852 if parent_bdev is None:
853 logger.Error("Can't find parent device")
855 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
856 if new_bdevs.count(None) > 0:
857 logger.Error("Can't find new device(s) to add: %s:%s" %
858 (new_bdevs, new_cdevs))
860 parent_bdev.AddChildren(new_bdevs)
864 def MirrorRemoveChildren(parent_cdev, new_cdevs):
865 """Shrink a mirrored block device.
868 parent_bdev = _RecursiveFindBD(parent_cdev)
869 if parent_bdev is None:
870 logger.Error("Can't find parent in remove children: %s" % parent_cdev)
873 for disk in new_cdevs:
874 rpath = disk.StaticDevPath()
876 bd = _RecursiveFindBD(disk)
878 logger.Error("Can't find dynamic device %s while removing children" %
882 devs.append(bd.dev_path)
885 parent_bdev.RemoveChildren(devs)
889 def GetMirrorStatus(disks):
890 """Get the mirroring status of a list of devices.
893 disks: list of `objects.Disk`
896 list of (mirror_done, estimated_time) tuples, which
897 are the result of bdev.BlockDevice.CombinedSyncStatus()
902 rbd = _RecursiveFindBD(dsk)
904 raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
905 stats.append(rbd.CombinedSyncStatus())
909 def _RecursiveFindBD(disk, allow_partial=False):
910 """Check if a device is activated.
912 If so, return informations about the real device.
915 disk: the objects.Disk instance
916 allow_partial: don't abort the find if a child of the
917 device can't be found; this is intended to be
918 used when repairing mirrors
921 None if the device can't be found
922 otherwise the device instance
927 for chdisk in disk.children:
928 children.append(_RecursiveFindBD(chdisk))
930 return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
933 def FindBlockDevice(disk):
934 """Check if a device is activated.
936 If so, return informations about the real device.
939 disk: the objects.Disk instance
941 None if the device can't be found
942 (device_path, major, minor, sync_percent, estimated_time, is_degraded)
945 rbd = _RecursiveFindBD(disk)
948 return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
951 def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
952 """Write a file to the filesystem.
954 This allows the master to overwrite(!) a file. It will only perform
955 the operation if the file belongs to a list of configuration files.
958 if not os.path.isabs(file_name):
959 logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
964 constants.CLUSTER_CONF_FILE,
966 constants.SSH_KNOWN_HOSTS_FILE,
968 allowed_files.extend(ssconf.SimpleStore().GetFileList())
969 if file_name not in allowed_files:
970 logger.Error("Filename passed to UploadFile not in allowed"
971 " upload targets: '%s'" % file_name)
974 utils.WriteFile(file_name, data=data, mode=mode, uid=uid, gid=gid,
975 atime=atime, mtime=mtime)
979 def _ErrnoOrStr(err):
980 """Format an EnvironmentError exception.
982 If the `err` argument has an errno attribute, it will be looked up
983 and converted into a textual EXXXX description. Otherwise the string
984 representation of the error will be returned.
987 if hasattr(err, 'errno'):
988 detail = errno.errorcode[err.errno]
994 def _OSOndiskVersion(name, os_dir):
995 """Compute and return the API version of a given OS.
997 This function will try to read the API version of the os given by
998 the 'name' parameter and residing in the 'os_dir' directory.
1000 Return value will be either an integer denoting the version or None in the
1001 case when this is not a valid OS name.
1004 api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1007 st = os.stat(api_file)
1008 except EnvironmentError, err:
1009 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1010 " found (%s)" % _ErrnoOrStr(err))
1012 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1013 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1019 api_version = f.read(256)
1022 except EnvironmentError, err:
1023 raise errors.InvalidOS(name, os_dir, "error while reading the"
1024 " API version (%s)" % _ErrnoOrStr(err))
1026 api_version = api_version.strip()
1028 api_version = int(api_version)
1029 except (TypeError, ValueError), err:
1030 raise errors.InvalidOS(name, os_dir,
1031 "API version is not integer (%s)" % str(err))
1036 def DiagnoseOS(top_dirs=None):
1037 """Compute the validity for all OSes.
1039 Returns an OS object for each name in all the given top directories
1040 (if not given defaults to constants.OS_SEARCH_PATH)
1046 if top_dirs is None:
1047 top_dirs = constants.OS_SEARCH_PATH
1050 for dir_name in top_dirs:
1051 if os.path.isdir(dir_name):
1053 f_names = utils.ListVisibleFiles(dir_name)
1054 except EnvironmentError, err:
1055 logger.Error("Can't list the OS directory %s: %s" %
1056 (dir_name, str(err)))
1058 for name in f_names:
1060 os_inst = OSFromDisk(name, base_dir=dir_name)
1061 result.append(os_inst)
1062 except errors.InvalidOS, err:
1063 result.append(objects.OS.FromInvalidOS(err))
1068 def OSFromDisk(name, base_dir=None):
1069 """Create an OS instance from disk.
1071 This function will return an OS instance if the given name is a
1072 valid OS name. Otherwise, it will raise an appropriate
1073 `errors.InvalidOS` exception, detailing why this is not a valid
1077 os_dir: Directory containing the OS scripts. Defaults to a search
1078 in all the OS_SEARCH_PATH directories.
1082 if base_dir is None:
1083 os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
1085 raise errors.InvalidOS(name, None, "OS dir not found in search path")
1087 os_dir = os.path.sep.join([base_dir, name])
1089 api_version = _OSOndiskVersion(name, os_dir)
1091 if api_version != constants.OS_API_VERSION:
1092 raise errors.InvalidOS(name, os_dir, "API version mismatch"
1093 " (found %s want %s)"
1094 % (api_version, constants.OS_API_VERSION))
1096 # OS Scripts dictionary, we will populate it with the actual script names
1097 os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1099 for script in os_scripts:
1100 os_scripts[script] = os.path.sep.join([os_dir, script])
1103 st = os.stat(os_scripts[script])
1104 except EnvironmentError, err:
1105 raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1106 (script, _ErrnoOrStr(err)))
1108 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1109 raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1112 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1113 raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1117 return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1118 create_script=os_scripts['create'],
1119 export_script=os_scripts['export'],
1120 import_script=os_scripts['import'],
1121 rename_script=os_scripts['rename'],
1122 api_version=api_version)
1125 def SnapshotBlockDevice(disk):
1126 """Create a snapshot copy of a block device.
1128 This function is called recursively, and the snapshot is actually created
1129 just for the leaf lvm backend device.
1132 disk: the disk to be snapshotted
1135 a config entry for the actual lvm device snapshotted.
1139 if len(disk.children) == 1:
1140 # only one child, let's recurse on it
1141 return SnapshotBlockDevice(disk.children[0])
1143 # more than one child, choose one that matches
1144 for child in disk.children:
1145 if child.size == disk.size:
1146 # return implies breaking the loop
1147 return SnapshotBlockDevice(child)
1148 elif disk.dev_type == constants.LD_LV:
1149 r_dev = _RecursiveFindBD(disk)
1150 if r_dev is not None:
1151 # let's stay on the safe side and ask for the full size, for now
1152 return r_dev.Snapshot(disk.size)
1156 raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1157 " '%s' of type '%s'" %
1158 (disk.unique_id, disk.dev_type))
1161 def ExportSnapshot(disk, dest_node, instance):
1162 """Export a block device snapshot to a remote node.
1165 disk: the snapshot block device
1166 dest_node: the node to send the image to
1167 instance: instance being exported
1170 True if successful, False otherwise.
1173 inst_os = OSFromDisk(instance.os)
1174 export_script = inst_os.export_script
1176 logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1177 instance.name, int(time.time()))
1178 if not os.path.exists(constants.LOG_OS_DIR):
1179 os.mkdir(constants.LOG_OS_DIR, 0750)
1181 real_os_dev = _RecursiveFindBD(disk)
1182 if real_os_dev is None:
1183 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1187 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1188 destfile = disk.physical_id[1]
1190 # the target command is built out of three individual commands,
1191 # which are joined by pipes; we check each individual command for
1194 expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1195 export_script, instance.name,
1196 real_os_dev.dev_path, logfile)
1200 destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1201 destdir, destdir, destfile)
1202 remotecmd = _GetSshRunner().BuildCmd(dest_node, constants.GANETI_RUNAS,
1205 # all commands have been checked, so we're safe to combine them
1206 command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1208 result = utils.RunCmd(command)
1211 logger.Error("os snapshot export command '%s' returned error: %s"
1213 (command, result.fail_reason, result.output))
1219 def FinalizeExport(instance, snap_disks):
1220 """Write out the export configuration information.
1223 instance: instance configuration
1224 snap_disks: snapshot block devices
1227 False in case of error, True otherwise.
1230 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1231 finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1233 config = objects.SerializableConfigParser()
1235 config.add_section(constants.INISECT_EXP)
1236 config.set(constants.INISECT_EXP, 'version', '0')
1237 config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1238 config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1239 config.set(constants.INISECT_EXP, 'os', instance.os)
1240 config.set(constants.INISECT_EXP, 'compression', 'gzip')
1242 config.add_section(constants.INISECT_INS)
1243 config.set(constants.INISECT_INS, 'name', instance.name)
1244 config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1245 config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1246 config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1249 for nic_count, nic in enumerate(instance.nics):
1250 config.set(constants.INISECT_INS, 'nic%d_mac' %
1251 nic_count, '%s' % nic.mac)
1252 config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1253 config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1254 # TODO: redundant: on load can read nics until it doesn't exist
1255 config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1258 for disk_count, disk in enumerate(snap_disks):
1259 config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1260 ('%s' % disk.iv_name))
1261 config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1262 ('%s' % disk.physical_id[1]))
1263 config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1265 config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1267 cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1268 cfo = open(cff, 'w')
1274 shutil.rmtree(finaldestdir, True)
1275 shutil.move(destdir, finaldestdir)
1280 def ExportInfo(dest):
1281 """Get export configuration information.
1284 dest: directory containing the export
1287 A serializable config file containing the export info.
1290 cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1292 config = objects.SerializableConfigParser()
1295 if (not config.has_section(constants.INISECT_EXP) or
1296 not config.has_section(constants.INISECT_INS)):
1302 def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1303 """Import an os image into an instance.
1306 instance: the instance object
1307 os_disk: the instance-visible name of the os device
1308 swap_disk: the instance-visible name of the swap device
1309 src_node: node holding the source image
1310 src_image: path to the source image on src_node
1313 False in case of error, True otherwise.
1316 inst_os = OSFromDisk(instance.os)
1317 import_script = inst_os.import_script
1319 os_device = instance.FindDisk(os_disk)
1320 if os_device is None:
1321 logger.Error("Can't find this device-visible name '%s'" % os_disk)
1324 swap_device = instance.FindDisk(swap_disk)
1325 if swap_device is None:
1326 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1329 real_os_dev = _RecursiveFindBD(os_device)
1330 if real_os_dev is None:
1331 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1335 real_swap_dev = _RecursiveFindBD(swap_device)
1336 if real_swap_dev is None:
1337 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1339 real_swap_dev.Open()
1341 logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1342 instance.name, int(time.time()))
1343 if not os.path.exists(constants.LOG_OS_DIR):
1344 os.mkdir(constants.LOG_OS_DIR, 0750)
1346 destcmd = utils.BuildShellCmd('cat %s', src_image)
1347 remotecmd = _GetSshRunner().BuildCmd(src_node, constants.GANETI_RUNAS,
1351 impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1352 inst_os.path, import_script, instance.name,
1353 real_os_dev.dev_path, real_swap_dev.dev_path,
1356 command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1358 result = utils.RunCmd(command)
1361 logger.Error("os import command '%s' returned error: %s"
1363 (command, result.fail_reason, result.output))
1370 """Return a list of exports currently available on this machine.
1373 if os.path.isdir(constants.EXPORT_DIR):
1374 return utils.ListVisibleFiles(constants.EXPORT_DIR)
1379 def RemoveExport(export):
1380 """Remove an existing export from the node.
1383 export: the name of the export to remove
1386 False in case of error, True otherwise.
1389 target = os.path.join(constants.EXPORT_DIR, export)
1391 shutil.rmtree(target)
1392 # TODO: catch some of the relevant exceptions and provide a pretty
1393 # error message if rmtree fails.
1398 def RenameBlockDevices(devlist):
1399 """Rename a list of block devices.
1401 The devlist argument is a list of tuples (disk, new_logical,
1402 new_physical). The return value will be a combined boolean result
1403 (True only if all renames succeeded).
1407 for disk, unique_id in devlist:
1408 dev = _RecursiveFindBD(disk)
1413 old_rpath = dev.dev_path
1414 dev.Rename(unique_id)
1415 new_rpath = dev.dev_path
1416 if old_rpath != new_rpath:
1417 DevCacheManager.RemoveCache(old_rpath)
1418 # FIXME: we should add the new cache information here, like:
1419 # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1420 # but we don't have the owner here - maybe parse from existing
1421 # cache? for now, we only lose lvm data when we rename, which
1422 # is less critical than DRBD or MD
1423 except errors.BlockDeviceError, err:
1424 logger.Error("Can't rename device '%s' to '%s': %s" %
1425 (dev, unique_id, err))
1430 def _TransformFileStorageDir(file_storage_dir):
1431 """Checks whether given file_storage_dir is valid.
1433 Checks wheter the given file_storage_dir is within the cluster-wide
1434 default file_storage_dir stored in SimpleStore. Only paths under that
1435 directory are allowed.
1438 file_storage_dir: string with path
1441 normalized file_storage_dir (string) if valid, None otherwise
1444 file_storage_dir = os.path.normpath(file_storage_dir)
1445 base_file_storage_dir = ssconf.SimpleStore().GetFileStorageDir()
1446 if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
1447 base_file_storage_dir):
1448 logger.Error("file storage directory '%s' is not under base file"
1449 " storage directory '%s'" %
1450 (file_storage_dir, base_file_storage_dir))
1452 return file_storage_dir
1455 def CreateFileStorageDir(file_storage_dir):
1456 """Create file storage directory.
1459 file_storage_dir: string containing the path
1462 tuple with first element a boolean indicating wheter dir
1463 creation was successful or not
1466 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1468 if not file_storage_dir:
1471 if os.path.exists(file_storage_dir):
1472 if not os.path.isdir(file_storage_dir):
1473 logger.Error("'%s' is not a directory" % file_storage_dir)
1477 os.makedirs(file_storage_dir, 0750)
1478 except OSError, err:
1479 logger.Error("Cannot create file storage directory '%s': %s" %
1480 (file_storage_dir, err))
1485 def RemoveFileStorageDir(file_storage_dir):
1486 """Remove file storage directory.
1488 Remove it only if it's empty. If not log an error and return.
1491 file_storage_dir: string containing the path
1494 tuple with first element a boolean indicating wheter dir
1495 removal was successful or not
1498 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1500 if not file_storage_dir:
1503 if os.path.exists(file_storage_dir):
1504 if not os.path.isdir(file_storage_dir):
1505 logger.Error("'%s' is not a directory" % file_storage_dir)
1507 # deletes dir only if empty, otherwise we want to return False
1509 os.rmdir(file_storage_dir)
1510 except OSError, err:
1511 logger.Error("Cannot remove file storage directory '%s': %s" %
1512 (file_storage_dir, err))
1517 def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
1518 """Rename the file storage directory.
1521 old_file_storage_dir: string containing the old path
1522 new_file_storage_dir: string containing the new path
1525 tuple with first element a boolean indicating wheter dir
1526 rename was successful or not
1529 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
1530 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
1532 if not old_file_storage_dir or not new_file_storage_dir:
1535 if not os.path.exists(new_file_storage_dir):
1536 if os.path.isdir(old_file_storage_dir):
1538 os.rename(old_file_storage_dir, new_file_storage_dir)
1539 except OSError, err:
1540 logger.Error("Cannot rename '%s' to '%s': %s"
1541 % (old_file_storage_dir, new_file_storage_dir, err))
1544 logger.Error("'%s' is not a directory" % old_file_storage_dir)
1547 if os.path.exists(old_file_storage_dir):
1548 logger.Error("Cannot rename '%s' to '%s'. Both locations exist." %
1549 old_file_storage_dir, new_file_storage_dir)
1554 class HooksRunner(object):
1557 This class is instantiated on the node side (ganeti-noded) and not on
1561 RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1563 def __init__(self, hooks_base_dir=None):
1564 """Constructor for hooks runner.
1567 - hooks_base_dir: if not None, this overrides the
1568 constants.HOOKS_BASE_DIR (useful for unittests)
1571 if hooks_base_dir is None:
1572 hooks_base_dir = constants.HOOKS_BASE_DIR
1573 self._BASE_DIR = hooks_base_dir
1576 def ExecHook(script, env):
1577 """Exec one hook script.
1580 - script: the full path to the script
1581 - env: the environment with which to exec the script
1584 # exec the process using subprocess and log the output
1587 fdstdin = open("/dev/null", "r")
1588 child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1589 stderr=subprocess.STDOUT, close_fds=True,
1590 shell=False, cwd="/", env=env)
1593 output = child.stdout.read(4096)
1594 child.stdout.close()
1595 except EnvironmentError, err:
1596 output += "Hook script error: %s" % str(err)
1600 result = child.wait()
1602 except EnvironmentError, err:
1603 if err.errno == errno.EINTR:
1607 # try not to leak fds
1608 for fd in (fdstdin, ):
1612 except EnvironmentError, err:
1613 # just log the error
1614 #logger.Error("While closing fd %s: %s" % (fd, err))
1617 return result == 0, output
1619 def RunHooks(self, hpath, phase, env):
1620 """Run the scripts in the hooks directory.
1622 This method will not be usually overriden by child opcodes.
1625 if phase == constants.HOOKS_PHASE_PRE:
1627 elif phase == constants.HOOKS_PHASE_POST:
1630 raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1633 subdir = "%s-%s.d" % (hpath, suffix)
1634 dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1636 dir_contents = utils.ListVisibleFiles(dir_name)
1637 except OSError, err:
1641 # we use the standard python sort order,
1642 # so 00name is the recommended naming scheme
1644 for relname in dir_contents:
1645 fname = os.path.join(dir_name, relname)
1646 if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1647 self.RE_MASK.match(relname) is not None):
1648 rrval = constants.HKR_SKIP
1651 result, output = self.ExecHook(fname, env)
1653 rrval = constants.HKR_FAIL
1655 rrval = constants.HKR_SUCCESS
1656 rr.append(("%s/%s" % (subdir, relname), rrval, output))
1661 class IAllocatorRunner(object):
1662 """IAllocator runner.
1664 This class is instantiated on the node side (ganeti-noded) and not on
1668 def Run(self, name, idata):
1669 """Run an iallocator script.
1671 Return value: tuple of:
1672 - run status (one of the IARUN_ constants)
1675 - fail reason (as from utils.RunResult)
1678 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
1680 if alloc_script is None:
1681 return (constants.IARUN_NOTFOUND, None, None, None)
1683 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
1687 result = utils.RunCmd([alloc_script, fin_name])
1689 return (constants.IARUN_FAILURE, result.stdout, result.stderr,
1694 return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
1697 class DevCacheManager(object):
1698 """Simple class for managing a cache of block device information.
1701 _DEV_PREFIX = "/dev/"
1702 _ROOT_DIR = constants.BDEV_CACHE_DIR
1705 def _ConvertPath(cls, dev_path):
1706 """Converts a /dev/name path to the cache file name.
1708 This replaces slashes with underscores and strips the /dev
1709 prefix. It then returns the full path to the cache file
1712 if dev_path.startswith(cls._DEV_PREFIX):
1713 dev_path = dev_path[len(cls._DEV_PREFIX):]
1714 dev_path = dev_path.replace("/", "_")
1715 fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1719 def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1720 """Updates the cache information for a given device.
1723 if dev_path is None:
1724 logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1726 fpath = cls._ConvertPath(dev_path)
1732 iv_name = "not_visible"
1733 fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1735 utils.WriteFile(fpath, data=fdata)
1736 except EnvironmentError, err:
1737 logger.Error("Can't update bdev cache for %s, error %s" %
1738 (dev_path, str(err)))
1741 def RemoveCache(cls, dev_path):
1742 """Remove data for a dev_path.
1745 if dev_path is None:
1746 logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1748 fpath = cls._ConvertPath(dev_path)
1750 utils.RemoveFile(fpath)
1751 except EnvironmentError, err:
1752 logger.Error("Can't update bdev cache for %s, error %s" %
1753 (dev_path, str(err)))