4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Functions used by the node daemon"""
36 from ganeti import logger
37 from ganeti import errors
38 from ganeti import utils
39 from ganeti import ssh
40 from ganeti import hypervisor
41 from ganeti import constants
42 from ganeti import bdev
43 from ganeti import objects
44 from ganeti import ssconf
47 # global cache for DrbdReconfigNet
49 # the cache is indexed by a tuple (instance_name, physical_id) of the
50 # disk, and contains as value the actual BlockDev instance (in our
51 # particular usage, only DRBD8 instances)
53 # the cache is cleared every time we restart DrbdReconfigNet with
54 # step=1, and it's checked for consistency (we have the same keys and
55 # size of the cache) when it's called with step!=1
60 """Activate local node as master node.
62 There are two needed steps for this:
63 - run the master script
64 - register the cron script
67 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
70 logger.Error("could not activate cluster interface with command %s,"
71 " error: '%s'" % (result.cmd, result.output))
78 """Deactivate this node as master.
80 This runs the master stop script.
83 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
86 logger.Error("could not deactivate cluster interface with command %s,"
87 " error: '%s'" % (result.cmd, result.output))
93 def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
94 """Joins this node to the cluster.
96 This does the following:
97 - updates the hostkeys of the machine (rsa and dsa)
98 - adds the ssh private key to the user
99 - adds the ssh public key to the users' authorized_keys file
102 sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
103 (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
104 (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
105 (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
106 for name, content, mode in sshd_keys:
107 utils.WriteFile(name, data=content, mode=mode)
110 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
112 except errors.OpExecError, err:
113 logger.Error("Error while processing user ssh files: %s" % err)
116 for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
117 utils.WriteFile(name, data=content, mode=0600)
119 utils.AddAuthorizedKey(auth_keys, sshpub)
121 utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
127 """Cleans up the current node and prepares it to be removed from the cluster.
130 if os.path.isdir(constants.DATA_DIR):
131 for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
132 full_name = os.path.join(constants.DATA_DIR, rel_name)
133 if os.path.isfile(full_name) and not os.path.islink(full_name):
134 utils.RemoveFile(full_name)
137 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
138 except errors.OpExecError, err:
139 logger.Error("Error while processing ssh files: %s" % err)
142 f = open(pub_key, 'r')
144 utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
148 utils.RemoveFile(priv_key)
149 utils.RemoveFile(pub_key)
152 def GetNodeInfo(vgname):
153 """Gives back a hash with different informations about the node.
156 { 'vg_size' : xxx, 'vg_free' : xxx, 'memory_domain0': xxx,
157 'memory_free' : xxx, 'memory_total' : xxx }
159 vg_size is the size of the configured volume group in MiB
160 vg_free is the free size of the volume group in MiB
161 memory_dom0 is the memory allocated for domain0 in MiB
162 memory_free is the currently available (free) ram in MiB
163 memory_total is the total number of ram in MiB
167 vginfo = _GetVGInfo(vgname)
168 outputarray['vg_size'] = vginfo['vg_size']
169 outputarray['vg_free'] = vginfo['vg_free']
171 hyper = hypervisor.GetHypervisor()
172 hyp_info = hyper.GetNodeInfo()
173 if hyp_info is not None:
174 outputarray.update(hyp_info)
176 f = open("/proc/sys/kernel/random/boot_id", 'r')
178 outputarray["bootid"] = f.read(128).rstrip("\n")
185 def VerifyNode(what):
186 """Verify the status of the local node.
189 what - a dictionary of things to check:
190 'filelist' : list of files for which to compute checksums
191 'nodelist' : list of nodes we should check communication with
192 'hypervisor': run the hypervisor-specific verify
194 Requested files on local node are checksummed and the result returned.
196 The nodelist is traversed, with the following checks being made
198 - known_hosts key correct
199 - correct resolving of node name (target node returns its own hostname
200 by ssh-execution of 'hostname', result compared against name in list.
205 if 'hypervisor' in what:
206 result['hypervisor'] = hypervisor.GetHypervisor().Verify()
208 if 'filelist' in what:
209 result['filelist'] = utils.FingerprintFiles(what['filelist'])
211 if 'nodelist' in what:
212 result['nodelist'] = {}
213 random.shuffle(what['nodelist'])
214 for node in what['nodelist']:
215 success, message = ssh.VerifyNodeHostname(node)
217 result['nodelist'][node] = message
218 if 'node-net-test' in what:
219 result['node-net-test'] = {}
220 my_name = utils.HostInfo().name
221 my_pip = my_sip = None
222 for name, pip, sip in what['node-net-test']:
228 result['node-net-test'][my_name] = ("Can't find my own"
229 " primary/secondary IP"
232 port = ssconf.SimpleStore().GetNodeDaemonPort()
233 for name, pip, sip in what['node-net-test']:
235 if not utils.TcpPing(pip, port, source=my_pip):
236 fail.append("primary")
238 if not utils.TcpPing(sip, port, source=my_sip):
239 fail.append("secondary")
241 result['node-net-test'][name] = ("failure using the %s"
248 def GetVolumeList(vg_name):
249 """Compute list of logical volumes and their size.
252 dictionary of all partions (key) with their size (in MiB), inactive
254 {'test1': ('20.06', True, True)}
259 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
260 "--separator=%s" % sep,
261 "-olv_name,lv_size,lv_attr", vg_name])
263 logger.Error("Failed to list logical volumes, lvs output: %s" %
267 for line in result.stdout.splitlines():
268 line = line.strip().rstrip(sep)
269 if line.count(sep) != 2:
270 logger.Error("Invalid line returned from lvs output: '%s'" % line)
272 name, size, attr = line.split(sep)
275 inactive = attr[4] == '-'
276 online = attr[5] == 'o'
277 lvs[name] = (size, inactive, online)
282 def ListVolumeGroups():
283 """List the volume groups and their size.
286 Dictionary with keys volume name and values the size of the volume
289 return utils.ListVolumeGroups()
293 """List all volumes on this node.
296 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
298 "--options=lv_name,lv_size,devices,vg_name"])
300 logger.Error("Failed to list logical volumes, lvs output: %s" %
306 return dev.split('(')[0]
312 'name': line[0].strip(),
313 'size': line[1].strip(),
314 'dev': parse_dev(line[2].strip()),
315 'vg': line[3].strip(),
318 return [map_line(line.split('|')) for line in result.stdout.splitlines()
319 if line.count('|') >= 3]
322 def BridgesExist(bridges_list):
323 """Check if a list of bridges exist on the current node.
326 True if all of them exist, false otherwise
329 for bridge in bridges_list:
330 if not utils.BridgeExists(bridge):
336 def GetInstanceList():
337 """Provides a list of instances.
340 A list of all running instances on the current node
341 - instance1.example.com
342 - instance2.example.com
346 names = hypervisor.GetHypervisor().ListInstances()
347 except errors.HypervisorError, err:
348 logger.Error("error enumerating instances: %s" % str(err))
354 def GetInstanceInfo(instance):
355 """Gives back the informations about an instance as a dictionary.
358 instance: name of the instance (ex. instance1.example.com)
361 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
363 memory: memory size of instance (int)
364 state: xen state of instance (string)
365 time: cpu time of instance (float)
370 iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
371 if iinfo is not None:
372 output['memory'] = iinfo[2]
373 output['state'] = iinfo[4]
374 output['time'] = iinfo[5]
379 def GetInstanceMigratable(instance):
380 """Gives whether an instance can be migrated.
383 instance - object representing the instance to be checked.
386 (result, description)
387 result - whether the instance can be migrated or not
388 description - a description of the issue, if relevant
391 hyper = hypervisor.GetHypervisor()
392 if instance.name not in hyper.ListInstances():
393 return (False, 'not running')
395 for disk in instance.disks:
396 link_name = _GetBlockDevSymlinkPath(instance.name, disk.iv_name)
397 if not os.path.islink(link_name):
398 return (False, 'not restarted since ganeti 1.2.5')
403 def GetAllInstancesInfo():
404 """Gather data about all instances.
406 This is the equivalent of `GetInstanceInfo()`, except that it
407 computes data for all instances at once, thus being faster if one
408 needs data about more than one instance.
410 Returns: a dictionary of dictionaries, keys being the instance name,
412 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
414 memory: memory size of instance (int)
415 state: xen state of instance (string)
416 time: cpu time of instance (float)
417 vcpus: the number of cpus
422 iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
424 for name, inst_id, memory, vcpus, state, times in iinfo:
435 def AddOSToInstance(instance, os_disk, swap_disk):
436 """Add an OS to an instance.
439 instance: the instance object
440 os_disk: the instance-visible name of the os device
441 swap_disk: the instance-visible name of the swap device
444 inst_os = OSFromDisk(instance.os)
446 create_script = inst_os.create_script
448 os_device = instance.FindDisk(os_disk)
449 if os_device is None:
450 logger.Error("Can't find this device-visible name '%s'" % os_disk)
453 swap_device = instance.FindDisk(swap_disk)
454 if swap_device is None:
455 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
458 real_os_dev = _RecursiveFindBD(os_device)
459 if real_os_dev is None:
460 raise errors.BlockDeviceError("Block device '%s' is not set up" %
464 real_swap_dev = _RecursiveFindBD(swap_device)
465 if real_swap_dev is None:
466 raise errors.BlockDeviceError("Block device '%s' is not set up" %
470 logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
471 instance.name, int(time.time()))
472 if not os.path.exists(constants.LOG_OS_DIR):
473 os.mkdir(constants.LOG_OS_DIR, 0750)
475 command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
476 inst_os.path, create_script, instance.name,
477 real_os_dev.dev_path, real_swap_dev.dev_path,
480 result = utils.RunCmd(command)
482 logger.Error("os create command '%s' returned error: %s, logfile: %s,"
484 (command, result.fail_reason, logfile, result.output))
490 def RunRenameInstance(instance, old_name, os_disk, swap_disk):
491 """Run the OS rename script for an instance.
494 instance: the instance object
495 old_name: the old name of the instance
496 os_disk: the instance-visible name of the os device
497 swap_disk: the instance-visible name of the swap device
500 inst_os = OSFromDisk(instance.os)
502 script = inst_os.rename_script
504 os_device = instance.FindDisk(os_disk)
505 if os_device is None:
506 logger.Error("Can't find this device-visible name '%s'" % os_disk)
509 swap_device = instance.FindDisk(swap_disk)
510 if swap_device is None:
511 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
514 real_os_dev = _RecursiveFindBD(os_device)
515 if real_os_dev is None:
516 raise errors.BlockDeviceError("Block device '%s' is not set up" %
520 real_swap_dev = _RecursiveFindBD(swap_device)
521 if real_swap_dev is None:
522 raise errors.BlockDeviceError("Block device '%s' is not set up" %
526 logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
528 instance.name, int(time.time()))
529 if not os.path.exists(constants.LOG_OS_DIR):
530 os.mkdir(constants.LOG_OS_DIR, 0750)
532 command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
533 inst_os.path, script, old_name, instance.name,
534 real_os_dev.dev_path, real_swap_dev.dev_path,
537 result = utils.RunCmd(command)
540 logger.Error("os create command '%s' returned error: %s"
542 (command, result.fail_reason, result.output))
548 def _GetVGInfo(vg_name):
549 """Get informations about the volume group.
552 vg_name: the volume group
555 { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
557 vg_size is the total size of the volume group in MiB
558 vg_free is the free size of the volume group in MiB
559 pv_count are the number of physical disks in that vg
561 If an error occurs during gathering of data, we return the same dict
562 with keys all set to None.
565 retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
567 retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
568 "--nosuffix", "--units=m", "--separator=:", vg_name])
571 errmsg = "volume group %s not present" % vg_name
574 valarr = retval.stdout.strip().rstrip(':').split(':')
578 "vg_size": int(round(float(valarr[0]), 0)),
579 "vg_free": int(round(float(valarr[1]), 0)),
580 "pv_count": int(valarr[2]),
582 except ValueError, err:
583 logger.Error("Fail to parse vgs output: %s" % str(err))
585 logger.Error("vgs output has the wrong number of fields (expected"
586 " three): %s" % str(valarr))
590 def _GetBlockDevSymlinkPath(instance_name, device_name):
591 return os.path.join(constants.DISK_LINKS_DIR,
592 "%s:%s" % (instance_name, device_name))
595 def _SymlinkBlockDev(instance_name, device_path, device_name):
596 """Set up symlinks to a instance's block device.
598 This is an auxiliary function run when an instance is start (on the primary
599 node) or when an instance is migrated (on the target node).
602 instance_name: the name of the target instance
603 device_path: path of the physical block device, on the node
604 device_name: 'virtual' name of the device
607 absolute path to the disk's symlink
610 link_name = _GetBlockDevSymlinkPath(instance_name, device_name)
612 os.symlink(device_path, link_name)
614 if err.errno == errno.EEXIST:
615 if (not os.path.islink(link_name) or
616 os.readlink(link_name) != device_path):
618 os.symlink(device_path, link_name)
625 def _RemoveBlockDevLinks(instance_name, disks):
626 """Remove the block device symlinks belonging to the given instance.
630 link_name = _GetBlockDevSymlinkPath(instance_name, disk.iv_name)
631 if os.path.islink(link_name):
635 logger.Error("Can't remove symlink '%s': %s" % (link_name, err))
638 def _GatherAndLinkBlockDevs(instance):
639 """Set up an instance's block device(s).
641 This is run on the primary node at instance startup. The block
642 devices must be already assembled.
645 A list of (block_device, disk_name) tuples.
649 for disk in instance.disks:
650 device = _RecursiveFindBD(disk)
652 raise errors.BlockDeviceError("Block device '%s' is not set up." %
656 link_name = _SymlinkBlockDev(instance.name, device.dev_path,
659 raise errors.BlockDeviceError("Cannot create block device symlink: %s" %
662 block_devices.append((link_name, disk.iv_name))
667 def StartInstance(instance, extra_args):
668 """Start an instance.
671 instance - object representing the instance to be started.
674 running_instances = GetInstanceList()
676 if instance.name in running_instances:
680 block_devices = _GatherAndLinkBlockDevs(instance)
681 hyper = hypervisor.GetHypervisor()
682 hyper.StartInstance(instance, block_devices, extra_args)
683 except errors.BlockDeviceError, err:
684 logger.Error("Failed to start instance: %s" % err)
686 except errors.HypervisorError, err:
687 logger.Error("Failed to start instance: %s" % err)
688 _RemoveBlockDevLinks(instance.name, instance.disks)
694 def ShutdownInstance(instance):
695 """Shut an instance down.
698 instance - object representing the instance to be shutdown.
701 running_instances = GetInstanceList()
703 if instance.name not in running_instances:
706 hyper = hypervisor.GetHypervisor()
708 hyper.StopInstance(instance)
709 except errors.HypervisorError, err:
710 logger.Error("Failed to stop instance: %s" % err)
713 # test every 10secs for 2min
717 for dummy in range(11):
718 if instance.name not in GetInstanceList():
722 # the shutdown did not succeed
723 logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
726 hyper.StopInstance(instance, force=True)
727 except errors.HypervisorError, err:
728 logger.Error("Failed to stop instance: %s" % err)
732 if instance.name in GetInstanceList():
733 logger.Error("could not shutdown instance '%s' even by destroy")
736 _RemoveBlockDevLinks(instance.name, instance.disks)
741 def RebootInstance(instance, reboot_type, extra_args):
742 """Reboot an instance.
745 instance - object representing the instance to be reboot.
746 reboot_type - how to reboot [soft,hard,full]
749 running_instances = GetInstanceList()
751 if instance.name not in running_instances:
752 logger.Error("Cannot reboot instance that is not running")
755 hyper = hypervisor.GetHypervisor()
756 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
758 hyper.RebootInstance(instance)
759 except errors.HypervisorError, err:
760 logger.Error("Failed to soft reboot instance: %s" % err)
762 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
764 ShutdownInstance(instance)
765 StartInstance(instance, extra_args)
766 except errors.HypervisorError, err:
767 logger.Error("Failed to hard reboot instance: %s" % err)
770 raise errors.ParameterError("reboot_type invalid")
776 def MigrateInstance(instance, target, live):
777 """Migrates an instance to another node.
780 instance - name of the instance to be migrated.
781 target - node to send the instance to.
782 live - whether to perform a live migration.
785 hyper = hypervisor.GetHypervisor()
788 hyper.MigrateInstance(instance, target, live)
789 except errors.HypervisorError, err:
793 return (True, "Migration successfull")
796 def CreateBlockDevice(disk, size, owner, on_primary, info):
797 """Creates a block device for an instance.
800 disk: a ganeti.objects.Disk object
801 size: the size of the physical underlying device
802 owner: a string with the name of the instance
803 on_primary: a boolean indicating if it is the primary node or not
804 info: string that will be sent to the physical device creation
807 the new unique_id of the device (this can sometime be
808 computed only after creation), or None. On secondary nodes,
809 it's not required to return anything.
814 for child in disk.children:
815 crdev = _RecursiveAssembleBD(child, owner, on_primary)
816 if on_primary or disk.AssembleOnSecondary():
817 # we need the children open in case the device itself has to
822 device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
823 if device is not None:
824 logger.Info("removing existing device %s" % disk)
826 except errors.BlockDeviceError, err:
829 device = bdev.Create(disk.dev_type, disk.physical_id,
832 raise ValueError("Can't create child device for %s, %s" %
834 if on_primary or disk.AssembleOnSecondary():
835 if not device.Assemble():
836 errorstring = "Can't assemble device after creation"
837 logger.Error(errorstring)
838 raise errors.BlockDeviceError("%s, very unusual event - check the node"
839 " daemon logs" % errorstring)
840 device.SetSyncSpeed(constants.SYNC_SPEED)
841 if on_primary or disk.OpenOnSecondary():
842 device.Open(force=True)
843 DevCacheManager.UpdateCache(device.dev_path, owner,
844 on_primary, disk.iv_name)
848 physical_id = device.unique_id
852 def RemoveBlockDevice(disk):
853 """Remove a block device.
855 This is intended to be called recursively.
859 # since we are removing the device, allow a partial match
860 # this allows removal of broken mirrors
861 rdev = _RecursiveFindBD(disk, allow_partial=True)
862 except errors.BlockDeviceError, err:
863 # probably can't attach
864 logger.Info("Can't attach to device %s in remove" % disk)
867 r_path = rdev.dev_path
868 result = rdev.Remove()
870 DevCacheManager.RemoveCache(r_path)
874 for child in disk.children:
875 result = result and RemoveBlockDevice(child)
879 def _RecursiveAssembleBD(disk, owner, as_primary):
880 """Activate a block device for an instance.
882 This is run on the primary and secondary nodes for an instance.
884 This function is called recursively.
887 disk: a objects.Disk object
888 as_primary: if we should make the block device read/write
891 the assembled device or None (in case no device was assembled)
893 If the assembly is not successful, an exception is raised.
898 mcn = disk.ChildrenNeeded()
900 mcn = 0 # max number of Nones allowed
902 mcn = len(disk.children) - mcn # max number of Nones
903 for chld_disk in disk.children:
905 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
906 except errors.BlockDeviceError, err:
907 if children.count(None) >= mcn:
910 logger.Debug("Error in child activation: %s" % str(err))
911 children.append(cdev)
913 if as_primary or disk.AssembleOnSecondary():
914 r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
915 r_dev.SetSyncSpeed(constants.SYNC_SPEED)
917 if as_primary or disk.OpenOnSecondary():
919 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
920 as_primary, disk.iv_name)
927 def AssembleBlockDevice(disk, owner, as_primary):
928 """Activate a block device for an instance.
930 This is a wrapper over _RecursiveAssembleBD.
933 a /dev path for primary nodes
934 True for secondary nodes
937 result = _RecursiveAssembleBD(disk, owner, as_primary)
938 if isinstance(result, bdev.BlockDev):
939 result = result.dev_path
943 def ShutdownBlockDevice(disk):
944 """Shut down a block device.
946 First, if the device is assembled (can `Attach()`), then the device
947 is shutdown. Then the children of the device are shutdown.
949 This function is called recursively. Note that we don't cache the
950 children or such, as oppossed to assemble, shutdown of different
951 devices doesn't require that the upper device was active.
954 r_dev = _RecursiveFindBD(disk)
955 if r_dev is not None:
956 r_path = r_dev.dev_path
957 result = r_dev.Shutdown()
959 DevCacheManager.RemoveCache(r_path)
963 for child in disk.children:
964 result = result and ShutdownBlockDevice(child)
968 def MirrorAddChildren(parent_cdev, new_cdevs):
969 """Extend a mirrored block device.
972 parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
973 if parent_bdev is None:
974 logger.Error("Can't find parent device")
976 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
977 if new_bdevs.count(None) > 0:
978 logger.Error("Can't find new device(s) to add: %s:%s" %
979 (new_bdevs, new_cdevs))
981 parent_bdev.AddChildren(new_bdevs)
985 def MirrorRemoveChildren(parent_cdev, new_cdevs):
986 """Shrink a mirrored block device.
989 parent_bdev = _RecursiveFindBD(parent_cdev)
990 if parent_bdev is None:
991 logger.Error("Can't find parent in remove children: %s" % parent_cdev)
994 for disk in new_cdevs:
995 rpath = disk.StaticDevPath()
997 bd = _RecursiveFindBD(disk)
999 logger.Error("Can't find dynamic device %s while removing children" %
1003 devs.append(bd.dev_path)
1006 parent_bdev.RemoveChildren(devs)
1010 def GetMirrorStatus(disks):
1011 """Get the mirroring status of a list of devices.
1014 disks: list of `objects.Disk`
1017 list of (mirror_done, estimated_time) tuples, which
1018 are the result of bdev.BlockDevice.CombinedSyncStatus()
1023 rbd = _RecursiveFindBD(dsk)
1025 raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
1026 stats.append(rbd.CombinedSyncStatus())
1030 def _RecursiveFindBD(disk, allow_partial=False):
1031 """Check if a device is activated.
1033 If so, return informations about the real device.
1036 disk: the objects.Disk instance
1037 allow_partial: don't abort the find if a child of the
1038 device can't be found; this is intended to be
1039 used when repairing mirrors
1042 None if the device can't be found
1043 otherwise the device instance
1048 for chdisk in disk.children:
1049 children.append(_RecursiveFindBD(chdisk))
1051 return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
1054 def FindBlockDevice(disk):
1055 """Check if a device is activated.
1057 If so, return informations about the real device.
1060 disk: the objects.Disk instance
1062 None if the device can't be found
1063 (device_path, major, minor, sync_percent, estimated_time, is_degraded)
1066 rbd = _RecursiveFindBD(disk)
1069 return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
1072 def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
1073 """Write a file to the filesystem.
1075 This allows the master to overwrite(!) a file. It will only perform
1076 the operation if the file belongs to a list of configuration files.
1079 if not os.path.isabs(file_name):
1080 logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
1085 constants.CLUSTER_CONF_FILE,
1086 constants.ETC_HOSTS,
1087 constants.SSH_KNOWN_HOSTS_FILE,
1089 allowed_files.extend(ssconf.SimpleStore().GetFileList())
1090 if file_name not in allowed_files:
1091 logger.Error("Filename passed to UploadFile not in allowed"
1092 " upload targets: '%s'" % file_name)
1095 dir_name, small_name = os.path.split(file_name)
1096 fd, new_name = tempfile.mkstemp('.new', small_name, dir_name)
1097 # here we need to make sure we remove the temp file, if any error
1098 # leaves it in place
1100 os.chown(new_name, uid, gid)
1101 os.chmod(new_name, mode)
1104 os.utime(new_name, (atime, mtime))
1105 os.rename(new_name, file_name)
1108 utils.RemoveFile(new_name)
1112 def _ErrnoOrStr(err):
1113 """Format an EnvironmentError exception.
1115 If the `err` argument has an errno attribute, it will be looked up
1116 and converted into a textual EXXXX description. Otherwise the string
1117 representation of the error will be returned.
1120 if hasattr(err, 'errno'):
1121 detail = errno.errorcode[err.errno]
1127 def _OSOndiskVersion(name, os_dir):
1128 """Compute and return the API version of a given OS.
1130 This function will try to read the API version of the os given by
1131 the 'name' parameter and residing in the 'os_dir' directory.
1133 Return value will be either an integer denoting the version or None in the
1134 case when this is not a valid OS name.
1137 api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1140 st = os.stat(api_file)
1141 except EnvironmentError, err:
1142 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1143 " found (%s)" % _ErrnoOrStr(err))
1145 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1146 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1152 api_versions = f.readlines()
1155 except EnvironmentError, err:
1156 raise errors.InvalidOS(name, os_dir, "error while reading the"
1157 " API version (%s)" % _ErrnoOrStr(err))
1159 api_versions = [version.strip() for version in api_versions]
1161 api_versions = [int(version) for version in api_versions]
1162 except (TypeError, ValueError), err:
1163 raise errors.InvalidOS(name, os_dir,
1164 "API version is not integer (%s)" % str(err))
1169 def DiagnoseOS(top_dirs=None):
1170 """Compute the validity for all OSes.
1172 Returns an OS object for each name in all the given top directories
1173 (if not given defaults to constants.OS_SEARCH_PATH)
1179 if top_dirs is None:
1180 top_dirs = constants.OS_SEARCH_PATH
1183 for dir_name in top_dirs:
1184 if os.path.isdir(dir_name):
1186 f_names = utils.ListVisibleFiles(dir_name)
1187 except EnvironmentError, err:
1188 logger.Error("Can't list the OS directory %s: %s" %
1189 (dir_name, str(err)))
1191 for name in f_names:
1193 os_inst = OSFromDisk(name, base_dir=dir_name)
1194 result.append(os_inst)
1195 except errors.InvalidOS, err:
1196 result.append(objects.OS.FromInvalidOS(err))
1201 def OSFromDisk(name, base_dir=None):
1202 """Create an OS instance from disk.
1204 This function will return an OS instance if the given name is a
1205 valid OS name. Otherwise, it will raise an appropriate
1206 `errors.InvalidOS` exception, detailing why this is not a valid
1210 os_dir: Directory containing the OS scripts. Defaults to a search
1211 in all the OS_SEARCH_PATH directories.
1215 if base_dir is None:
1216 os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
1218 raise errors.InvalidOS(name, None, "OS dir not found in search path")
1220 os_dir = os.path.sep.join([base_dir, name])
1222 api_versions = _OSOndiskVersion(name, os_dir)
1224 if constants.OS_API_VERSION not in api_versions:
1225 raise errors.InvalidOS(name, os_dir, "API version mismatch"
1226 " (found %s want %s)"
1227 % (api_versions, constants.OS_API_VERSION))
1229 # OS Scripts dictionary, we will populate it with the actual script names
1230 os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1232 for script in os_scripts:
1233 os_scripts[script] = os.path.sep.join([os_dir, script])
1236 st = os.stat(os_scripts[script])
1237 except EnvironmentError, err:
1238 raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1239 (script, _ErrnoOrStr(err)))
1241 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1242 raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1245 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1246 raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1250 return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1251 create_script=os_scripts['create'],
1252 export_script=os_scripts['export'],
1253 import_script=os_scripts['import'],
1254 rename_script=os_scripts['rename'],
1255 api_versions=api_versions)
1258 def GrowBlockDevice(disk, amount):
1259 """Grow a stack of block devices.
1261 This function is called recursively, with the childrens being the
1265 disk: the disk to be grown
1267 Returns: a tuple of (status, result), with:
1268 status: the result (true/false) of the operation
1269 result: the error message if the operation failed, otherwise not used
1272 r_dev = _RecursiveFindBD(disk)
1274 return False, "Cannot find block device %s" % (disk,)
1278 except errors.BlockDeviceError, err:
1279 return False, str(err)
1284 def SnapshotBlockDevice(disk):
1285 """Create a snapshot copy of a block device.
1287 This function is called recursively, and the snapshot is actually created
1288 just for the leaf lvm backend device.
1291 disk: the disk to be snapshotted
1294 a config entry for the actual lvm device snapshotted.
1298 if len(disk.children) == 1:
1299 # only one child, let's recurse on it
1300 return SnapshotBlockDevice(disk.children[0])
1302 # more than one child, choose one that matches
1303 for child in disk.children:
1304 if child.size == disk.size:
1305 # return implies breaking the loop
1306 return SnapshotBlockDevice(child)
1307 elif disk.dev_type == constants.LD_LV:
1308 r_dev = _RecursiveFindBD(disk)
1309 if r_dev is not None:
1310 # let's stay on the safe side and ask for the full size, for now
1311 return r_dev.Snapshot(disk.size)
1315 raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1316 " '%s' of type '%s'" %
1317 (disk.unique_id, disk.dev_type))
1320 def ExportSnapshot(disk, dest_node, instance):
1321 """Export a block device snapshot to a remote node.
1324 disk: the snapshot block device
1325 dest_node: the node to send the image to
1326 instance: instance being exported
1329 True if successful, False otherwise.
1332 inst_os = OSFromDisk(instance.os)
1333 export_script = inst_os.export_script
1335 logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1336 instance.name, int(time.time()))
1337 if not os.path.exists(constants.LOG_OS_DIR):
1338 os.mkdir(constants.LOG_OS_DIR, 0750)
1340 real_os_dev = _RecursiveFindBD(disk)
1341 if real_os_dev is None:
1342 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1346 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1347 destfile = disk.physical_id[1]
1349 # the target command is built out of three individual commands,
1350 # which are joined by pipes; we check each individual command for
1353 expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1354 export_script, instance.name,
1355 real_os_dev.dev_path, logfile)
1359 destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1360 destdir, destdir, destfile)
1361 remotecmd = ssh.BuildSSHCmd(dest_node, constants.GANETI_RUNAS, destcmd)
1365 # all commands have been checked, so we're safe to combine them
1366 command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1368 result = utils.RunCmd(command)
1371 logger.Error("os snapshot export command '%s' returned error: %s"
1373 (command, result.fail_reason, result.output))
1379 def FinalizeExport(instance, snap_disks):
1380 """Write out the export configuration information.
1383 instance: instance configuration
1384 snap_disks: snapshot block devices
1387 False in case of error, True otherwise.
1390 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1391 finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1393 config = objects.SerializableConfigParser()
1395 config.add_section(constants.INISECT_EXP)
1396 config.set(constants.INISECT_EXP, 'version', '0')
1397 config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1398 config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1399 config.set(constants.INISECT_EXP, 'os', instance.os)
1400 config.set(constants.INISECT_EXP, 'compression', 'gzip')
1402 config.add_section(constants.INISECT_INS)
1403 config.set(constants.INISECT_INS, 'name', instance.name)
1404 config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1405 config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1406 config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1407 for nic_count, nic in enumerate(instance.nics):
1408 config.set(constants.INISECT_INS, 'nic%d_mac' %
1409 nic_count, '%s' % nic.mac)
1410 config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1411 config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count,
1413 # TODO: redundant: on load can read nics until it doesn't exist
1414 config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1416 for disk_count, disk in enumerate(snap_disks):
1417 config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1418 ('%s' % disk.iv_name))
1419 config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1420 ('%s' % disk.physical_id[1]))
1421 config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1423 config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1425 cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1426 cfo = open(cff, 'w')
1432 shutil.rmtree(finaldestdir, True)
1433 shutil.move(destdir, finaldestdir)
1438 def ExportInfo(dest):
1439 """Get export configuration information.
1442 dest: directory containing the export
1445 A serializable config file containing the export info.
1448 cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1450 config = objects.SerializableConfigParser()
1453 if (not config.has_section(constants.INISECT_EXP) or
1454 not config.has_section(constants.INISECT_INS)):
1460 def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1461 """Import an os image into an instance.
1464 instance: the instance object
1465 os_disk: the instance-visible name of the os device
1466 swap_disk: the instance-visible name of the swap device
1467 src_node: node holding the source image
1468 src_image: path to the source image on src_node
1471 False in case of error, True otherwise.
1474 inst_os = OSFromDisk(instance.os)
1475 import_script = inst_os.import_script
1477 os_device = instance.FindDisk(os_disk)
1478 if os_device is None:
1479 logger.Error("Can't find this device-visible name '%s'" % os_disk)
1482 swap_device = instance.FindDisk(swap_disk)
1483 if swap_device is None:
1484 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1487 real_os_dev = _RecursiveFindBD(os_device)
1488 if real_os_dev is None:
1489 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1493 real_swap_dev = _RecursiveFindBD(swap_device)
1494 if real_swap_dev is None:
1495 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1497 real_swap_dev.Open()
1499 logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1500 instance.name, int(time.time()))
1501 if not os.path.exists(constants.LOG_OS_DIR):
1502 os.mkdir(constants.LOG_OS_DIR, 0750)
1504 destcmd = utils.BuildShellCmd('cat %s', src_image)
1505 remotecmd = ssh.BuildSSHCmd(src_node, constants.GANETI_RUNAS, destcmd)
1508 impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1509 inst_os.path, import_script, instance.name,
1510 real_os_dev.dev_path, real_swap_dev.dev_path,
1513 command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1515 result = utils.RunCmd(command)
1518 logger.Error("os import command '%s' returned error: %s"
1520 (command, result.fail_reason, result.output))
1527 """Return a list of exports currently available on this machine.
1530 if os.path.isdir(constants.EXPORT_DIR):
1531 return utils.ListVisibleFiles(constants.EXPORT_DIR)
1536 def RemoveExport(export):
1537 """Remove an existing export from the node.
1540 export: the name of the export to remove
1543 False in case of error, True otherwise.
1546 target = os.path.join(constants.EXPORT_DIR, export)
1548 shutil.rmtree(target)
1549 # TODO: catch some of the relevant exceptions and provide a pretty
1550 # error message if rmtree fails.
1555 def RenameBlockDevices(devlist):
1556 """Rename a list of block devices.
1558 The devlist argument is a list of tuples (disk, new_logical,
1559 new_physical). The return value will be a combined boolean result
1560 (True only if all renames succeeded).
1564 for disk, unique_id in devlist:
1565 dev = _RecursiveFindBD(disk)
1570 old_rpath = dev.dev_path
1571 dev.Rename(unique_id)
1572 new_rpath = dev.dev_path
1573 if old_rpath != new_rpath:
1574 DevCacheManager.RemoveCache(old_rpath)
1575 # FIXME: we should add the new cache information here, like:
1576 # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1577 # but we don't have the owner here - maybe parse from existing
1578 # cache? for now, we only lose lvm data when we rename, which
1579 # is less critical than DRBD or MD
1580 except errors.BlockDeviceError, err:
1581 logger.Error("Can't rename device '%s' to '%s': %s" %
1582 (dev, unique_id, err))
1587 def CloseBlockDevices(instance_name, disks):
1588 """Closes the given block devices.
1590 This means they will be switched to secondary mode (in case of DRBD).
1593 - instance_name: if the argument is not false, then the symlinks
1594 belonging to the instance are removed, in order
1595 to keep only the 'active' devices symlinked
1596 - disks: a list of objects.Disk instances
1601 rd = _RecursiveFindBD(cf)
1603 return (False, "Can't find device %s" % cf)
1610 except errors.BlockDeviceError, err:
1611 msg.append(str(err))
1613 _RemoveBlockDevLinks(instance_name, disks)
1615 return (False, "Can't make devices secondary: %s" % ",".join(msg))
1617 return (True, "All devices secondary")
1620 def DrbdReconfigNet(instance_name, disks, nodes_ip, multimaster, step):
1621 """Tune the network settings on a list of drbd devices.
1623 The 'step' argument has three possible values:
1624 - step 1, init the operation and identify the disks
1625 - step 2, disconnect the network
1626 - step 3, reconnect with correct settings and (if needed) set primary
1627 - step 4, change disks into read-only (secondary) mode
1628 - step 5, wait until devices are synchronized
1631 # local short name for the cache
1632 cache = _D8_RECONF_CACHE
1634 # set the correct physical ID so that we can use it in the cache
1635 my_name = utils.HostInfo().name
1637 cf.SetPhysicalID(my_name, nodes_ip)
1641 # note: the cache keys do not contain the multimaster setting, as we
1642 # want to reuse the cache between the to-master, (in the meantime
1643 # failover), to-secondary calls
1645 if step == constants.DRBD_RECONF_RPC_INIT:
1646 # we clear the cache
1649 key = (instance_name, cf.physical_id)
1650 rd = _RecursiveFindBD(cf)
1652 return (False, "Can't find device %s" % cf)
1656 # we check that the cached items are exactly what we have been passed
1658 key = (instance_name, cf.physical_id)
1659 if key not in cache:
1660 return (False, "ReconfigCache has wrong contents - missing %s" % key)
1661 bdevs.append(cache[key])
1662 if len(cache) != len(disks):
1663 return (False, "ReconfigCache: wrong contents, extra items are present")
1665 if step == constants.DRBD_RECONF_RPC_INIT:
1666 # nothing to do beside the discovery/caching of the disks
1667 return (True, "Disks have been identified")
1668 elif step == constants.DRBD_RECONF_RPC_DISCONNECT:
1673 except errors.BlockDeviceError, err:
1674 logger.Debug("Failed to go into standalone mode: %s" % str(err))
1675 return (False, "Can't change network configuration: %s" % str(err))
1676 return (True, "All disks are now disconnected")
1677 elif step == constants.DRBD_RECONF_RPC_RECONNECT:
1679 for cf, rd in zip(disks, bdevs):
1681 _SymlinkBlockDev(instance_name, rd.dev_path, cf.iv_name)
1682 except EnvironmentError, err:
1683 return (False, "Can't create symlink: %s" % str(err))
1684 # reconnect disks, switch to new master configuration and if
1685 # needed primary mode
1688 rd.ReAttachNet(multimaster)
1689 except errors.BlockDeviceError, err:
1690 return (False, "Can't change network configuration: %s" % str(err))
1691 # wait until the disks are connected; we need to retry the re-attach
1692 # if the device becomes standalone, as this might happen if the one
1693 # node disconnects and reconnects in a different mode before the
1694 # other node reconnects; in this case, one or both of the nodes will
1695 # decide it has wrong configuration and switch to standalone
1696 RECONNECT_TIMEOUT = 2 * 60
1697 sleep_time = 0.100 # start with 100 miliseconds
1698 timeout_limit = time.time() + RECONNECT_TIMEOUT
1699 while time.time() < timeout_limit:
1700 all_connected = True
1702 stats = rd.GetProcStatus()
1703 if not (stats.is_connected or stats.is_in_resync):
1704 all_connected = False
1705 if stats.is_standalone:
1706 # peer had different config info and this node became
1707 # standalone, even though this should not happen with the
1708 # new staged way of changing disk configs
1710 rd.ReAttachNet(multimaster)
1711 except errors.BlockDeviceError, err:
1712 return (False, "Can't change network configuration: %s" % str(err))
1715 time.sleep(sleep_time)
1716 sleep_time = min(5, sleep_time * 1.5)
1717 if not all_connected:
1718 return (False, "Timeout in disk reconnecting")
1720 # change to primary mode
1724 msg = "multi-master and primary"
1726 msg = "single-master"
1727 return (True, "Disks are now configured as %s" % msg)
1728 elif step == constants.DRBD_RECONF_RPC_SECONDARY:
1733 except errors.BlockDeviceError, err:
1734 msg.append(str(err))
1735 _RemoveBlockDevLinks(instance_name, disks)
1737 return (False, "Can't make devices secondary: %s" % ",".join(msg))
1739 return (True, "All devices secondary")
1740 elif step == constants.DRBD_RECONF_RPC_WFSYNC:
1745 stats = rd.GetProcStatus()
1746 if not (stats.is_connected or stats.is_in_resync):
1749 alldone = alldone and (not stats.is_in_resync)
1750 if stats.sync_percent is not None:
1751 min_resync = min(min_resync, stats.sync_percent)
1752 return (not failure, (alldone, min_resync))
1754 return (False, "Unknown reconfiguration step %s" % step)
1757 class HooksRunner(object):
1760 This class is instantiated on the node side (ganeti-noded) and not on
1764 RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1766 def __init__(self, hooks_base_dir=None):
1767 """Constructor for hooks runner.
1770 - hooks_base_dir: if not None, this overrides the
1771 constants.HOOKS_BASE_DIR (useful for unittests)
1774 if hooks_base_dir is None:
1775 hooks_base_dir = constants.HOOKS_BASE_DIR
1776 self._BASE_DIR = hooks_base_dir
1779 def ExecHook(script, env):
1780 """Exec one hook script.
1783 - script: the full path to the script
1784 - env: the environment with which to exec the script
1787 # exec the process using subprocess and log the output
1790 fdstdin = open("/dev/null", "r")
1791 child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1792 stderr=subprocess.STDOUT, close_fds=True,
1793 shell=False, cwd="/", env=env)
1796 output = child.stdout.read(4096)
1797 child.stdout.close()
1798 except EnvironmentError, err:
1799 output += "Hook script error: %s" % str(err)
1803 result = child.wait()
1805 except EnvironmentError, err:
1806 if err.errno == errno.EINTR:
1810 # try not to leak fds
1811 for fd in (fdstdin, ):
1815 except EnvironmentError, err:
1816 # just log the error
1817 #logger.Error("While closing fd %s: %s" % (fd, err))
1820 return result == 0, output
1822 def RunHooks(self, hpath, phase, env):
1823 """Run the scripts in the hooks directory.
1825 This method will not be usually overriden by child opcodes.
1828 if phase == constants.HOOKS_PHASE_PRE:
1830 elif phase == constants.HOOKS_PHASE_POST:
1833 raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1836 subdir = "%s-%s.d" % (hpath, suffix)
1837 dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1839 dir_contents = utils.ListVisibleFiles(dir_name)
1840 except OSError, err:
1844 # we use the standard python sort order,
1845 # so 00name is the recommended naming scheme
1847 for relname in dir_contents:
1848 fname = os.path.join(dir_name, relname)
1849 if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1850 self.RE_MASK.match(relname) is not None):
1851 rrval = constants.HKR_SKIP
1854 result, output = self.ExecHook(fname, env)
1856 rrval = constants.HKR_FAIL
1858 rrval = constants.HKR_SUCCESS
1859 rr.append(("%s/%s" % (subdir, relname), rrval, output))
1864 class IAllocatorRunner(object):
1865 """IAllocator runner.
1867 This class is instantiated on the node side (ganeti-noded) and not on
1871 def Run(self, name, idata):
1872 """Run an iallocator script.
1874 Return value: tuple of:
1875 - run status (one of the IARUN_ constants)
1878 - fail reason (as from utils.RunResult)
1881 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
1883 if alloc_script is None:
1884 return (constants.IARUN_NOTFOUND, None, None, None)
1886 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
1890 result = utils.RunCmd([alloc_script, fin_name])
1892 return (constants.IARUN_FAILURE, result.stdout, result.stderr,
1897 return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
1900 class DevCacheManager(object):
1901 """Simple class for managing a cache of block device information.
1904 _DEV_PREFIX = "/dev/"
1905 _ROOT_DIR = constants.BDEV_CACHE_DIR
1908 def _ConvertPath(cls, dev_path):
1909 """Converts a /dev/name path to the cache file name.
1911 This replaces slashes with underscores and strips the /dev
1912 prefix. It then returns the full path to the cache file
1915 if dev_path.startswith(cls._DEV_PREFIX):
1916 dev_path = dev_path[len(cls._DEV_PREFIX):]
1917 dev_path = dev_path.replace("/", "_")
1918 fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1922 def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1923 """Updates the cache information for a given device.
1926 if dev_path is None:
1927 logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1929 fpath = cls._ConvertPath(dev_path)
1935 iv_name = "not_visible"
1936 fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1938 utils.WriteFile(fpath, data=fdata)
1939 except EnvironmentError, err:
1940 logger.Error("Can't update bdev cache for %s, error %s" %
1941 (dev_path, str(err)))
1944 def RemoveCache(cls, dev_path):
1945 """Remove data for a dev_path.
1948 if dev_path is None:
1949 logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1951 fpath = cls._ConvertPath(dev_path)
1953 utils.RemoveFile(fpath)
1954 except EnvironmentError, err:
1955 logger.Error("Can't update bdev cache for %s, error %s" %
1956 (dev_path, str(err)))